// // Copyright (c) Microsoft. All rights reserved. // Licensed under the MIT license. See LICENSE file in the project root for full license information. // /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XX XX XX Amd64/x86 Code Generator XX XX XX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ #include "jitpch.h" #ifdef _MSC_VER #pragma hdrstop #endif #ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator. #ifdef _TARGET_XARCH_ #include "emit.h" #include "codegen.h" #include "lower.h" #include "gcinfo.h" #include "gcinfoencoder.h" // Get the register assigned to the given node regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree) { return tree->gtRegNum; } //------------------------------------------------------------------------ // genSpillVar: Spill a local variable // // Arguments: // tree - the lclVar node for the variable being spilled // // Return Value: // None. // // Assumptions: // The lclVar must be a register candidate (lvRegCandidate) void CodeGen::genSpillVar(GenTreePtr tree) { unsigned varNum = tree->gtLclVarCommon.gtLclNum; LclVarDsc * varDsc = &(compiler->lvaTable[varNum]); assert(varDsc->lvIsRegCandidate()); // We don't actually need to spill if it is already living in memory bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg()); if (needsSpill) { var_types lclTyp = varDsc->TypeGet(); if (varDsc->lvNormalizeOnStore()) lclTyp = genActualType(lclTyp); emitAttr size = emitTypeSize(lclTyp); bool restoreRegVar = false; if (tree->gtOper == GT_REG_VAR) { tree->SetOper(GT_LCL_VAR); restoreRegVar = true; } // mask off the flag to generate the right spill code, then bring it back tree->gtFlags &= ~GTF_REG_VAL; instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum)); if (varTypeIsMultiReg(tree)) { assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair)); assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair)); regNumber regLo = genRegPairLo(tree->gtRegPair); regNumber regHi = genRegPairHi(tree->gtRegPair); inst_TT_RV(storeIns, tree, regLo); inst_TT_RV(storeIns, tree, regHi, 4); } else { assert(varDsc->lvRegNum == tree->gtRegNum); inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size); } tree->gtFlags |= GTF_REG_VAL; if (restoreRegVar) { tree->SetOper(GT_REG_VAR); } genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree)); gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask()); if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex)) { #ifdef DEBUG if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) { JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum); } else { JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum); } #endif VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); } } tree->gtFlags &= ~GTF_SPILL; varDsc->lvRegNum = REG_STK; if (varTypeIsMultiReg(tree)) { varDsc->lvOtherReg = REG_STK; } } // inline void CodeGenInterface::genUpdateVarReg(LclVarDsc * varDsc, GenTreePtr tree) { assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY)); varDsc->lvRegNum = tree->gtRegNum; } /*****************************************************************************/ /*****************************************************************************/ /***************************************************************************** * * Generate code that will set the given register to the integer constant. */ void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags) { // Reg cannot be a FP reg assert(!genIsValidFloatReg(reg)); // The only TYP_REF constant that can come this path is a managed 'null' since it is not // relocatable. Other ref type constants (e.g. string objects) go through a different // code path. noway_assert(type != TYP_REF || val == 0); if (val == 0) { instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags); } else { // TODO-XArch-CQ: needs all the optimized cases getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val); } } /***************************************************************************** * * Generate code to check that the GS cookie wasn't thrashed by a buffer * overrun. If pushReg is true, preserve all registers around code sequence. * Otherwise ECX could be modified. * * Implementation Note: pushReg = true, in case of tail calls. */ void CodeGen::genEmitGSCookieCheck(bool pushReg) { noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal); // Make sure that EAX is reported as live GC-ref so that any GC that kicks in while // executing GS cookie check will not collect the object pointed to by EAX. if (!pushReg && (compiler->info.compRetType == TYP_REF)) gcInfo.gcRegGCrefSetCur |= RBM_INTRET; regNumber regGSCheck; if (!pushReg) { // Non-tail call: we can use any callee trash register that is not // a return register or contain 'this' pointer (keep alive this), since // we are generating GS cookie check after a GT_RETURN block. if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister && (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX)) { regGSCheck = REG_RDX; } else { regGSCheck = REG_RCX; } } else { #ifdef _TARGET_X86_ NYI_X86("Tail calls from methods that need GS check"); regGSCheck = REG_NA; #else // !_TARGET_X86_ // Tail calls from methods that need GS check: We need to preserve registers while // emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie // check, we might need a register. This won't be an issue for jmp calls for the // reason mentioned below (see comment starting with "Jmp Calls:"). // // The following are the possible solutions in case of tail prefixed calls: // 1) Use R11 - ignore tail prefix on calls that need to pass a param in R11 when // present in methods that require GS cookie check. Rest of the tail calls that // do not require R11 will be honored. // 2) Internal register - GT_CALL node reserves an internal register and emits GS // cookie check as part of tail call codegen. GenExitCode() needs to special case // fast tail calls implemented as epilog+jmp or such tail calls should always get // dispatched via helper. // 3) Materialize GS cookie check as a sperate node hanging off GT_CALL node in // right execution order during rationalization. // // There are two calls that use R11: VSD and calli pinvokes with cookie param. Tail // prefix on pinvokes is ignored. That is, options 2 and 3 will allow tail prefixed // VSD calls from methods that need GS check. // // Tail prefixed calls: Right now for Jit64 compat, method requiring GS cookie check // ignores tail prefix. In future, if we intend to support tail calls from such a method, // consider one of the options mentioned above. For now adding an assert that we don't // expect to see a tail call in a method that requires GS check. noway_assert(!compiler->compTailCallUsed); // Jmp calls: specify method handle using which JIT queries VM for its entry point // address and hence it can neither be a VSD call nor PInvoke calli with cookie // parameter. Therefore, in case of jmp calls it is safe to use R11. regGSCheck = REG_R11; #endif // !_TARGET_X86_ } if (compiler->gsGlobalSecurityCookieAddr == nullptr) { // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'. // Otherwise, load the value into a reg and use 'cmp mem64, reg64'. if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal) { genSetRegToIcon(regGSCheck, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0); } else { getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0, (int)compiler->gsGlobalSecurityCookieVal); } } else { // Ngen case - GS cookie value needs to be accessed through an indirection. instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr); getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0); getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0); } BasicBlock *gsCheckBlk = genCreateTempLabel(); inst_JMP(genJumpKindForOper(GT_EQ, true), gsCheckBlk); genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN); genDefineTempLabel(gsCheckBlk); } /***************************************************************************** * * Generate code for all the basic blocks in the function. */ void CodeGen::genCodeForBBlist() { unsigned varNum; LclVarDsc * varDsc; unsigned savedStkLvl; #ifdef DEBUG genInterruptibleUsed = true; unsigned stmtNum = 0; UINT64 totalCostEx = 0; UINT64 totalCostSz = 0; // You have to be careful if you create basic blocks from now on compiler->fgSafeBasicBlockCreation = false; // This stress mode is not comptible with fully interruptible GC if (genInterruptible && compiler->opts.compStackCheckOnCall) { compiler->opts.compStackCheckOnCall = false; } // This stress mode is not comptible with fully interruptible GC if (genInterruptible && compiler->opts.compStackCheckOnRet) { compiler->opts.compStackCheckOnRet = false; } #endif // DEBUG // Prepare the blocks for exception handling codegen: mark the blocks that needs labels. genPrepForEHCodegen(); assert(!compiler->fgFirstBBScratch || compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first. /* Initialize the spill tracking logic */ regSet.rsSpillBeg(); /* Initialize the line# tracking logic */ #ifdef DEBUGGING_SUPPORT if (compiler->opts.compScopeInfo) { siInit(); } #endif // The current implementation of switch tables requires the first block to have a label so it // can generate offsets to the switch label targets. // TODO-XArch-CQ: remove this when switches have been re-implemented to not use this. if (compiler->fgHasSwitch) { compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET; } genPendingCallLabel = nullptr; /* Initialize the pointer tracking code */ gcInfo.gcRegPtrSetInit(); gcInfo.gcVarPtrSetInit(); /* If any arguments live in registers, mark those regs as such */ for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++ , varDsc++) { /* Is this variable a parameter assigned to a register? */ if (!varDsc->lvIsParam || !varDsc->lvRegister) continue; /* Is the argument live on entry to the method? */ if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)) continue; /* Is this a floating-point argument? */ if (varDsc->IsFloatRegType()) continue; noway_assert(!varTypeIsFloating(varDsc->TypeGet())); /* Mark the register as holding the variable */ regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum); } unsigned finallyNesting = 0; // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without // allocation at the start of each basic block. VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler)); /*------------------------------------------------------------------------- * * Walk the basic blocks and generate code for each one * */ BasicBlock * block; BasicBlock * lblk; /* previous block */ for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext) { #ifdef DEBUG if (compiler->verbose) { printf("\n=============== Generating "); block->dspBlockHeader(compiler, true, true); compiler->fgDispBBLiveness(block); } #endif // DEBUG /* Figure out which registers hold variables on entry to this block */ regSet.rsMaskVars = RBM_NONE; gcInfo.gcRegGCrefSetCur = RBM_NONE; gcInfo.gcRegByrefSetCur = RBM_NONE; compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block); genUpdateLife(block->bbLiveIn); // Even if liveness didn't change, we need to update the registers containing GC references. // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't change? // We cleared them out above. Maybe we should just not clear them out, but update the ones that change here. // That would require handling the changes in recordVarLocationsAtStartOfBB(). regMaskTP newLiveRegSet = RBM_NONE; regMaskTP newRegGCrefSet = RBM_NONE; regMaskTP newRegByrefSet = RBM_NONE; #ifdef DEBUG VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler)); VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler)); #endif VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex); while (iter.NextElem(compiler, &varIndex)) { unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; LclVarDsc* varDsc = &(compiler->lvaTable[varNum]); if (varDsc->lvIsInReg()) { newLiveRegSet |= varDsc->lvRegMask(); if (varDsc->lvType == TYP_REF) { newRegGCrefSet |= varDsc->lvRegMask(); } else if (varDsc->lvType == TYP_BYREF) { newRegByrefSet |= varDsc->lvRegMask(); } #ifdef DEBUG if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) { VarSetOps::AddElemD(compiler, removedGCVars, varIndex); } #endif // DEBUG VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); } else if (compiler->lvaIsGCTracked(varDsc)) { #ifdef DEBUG if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) { VarSetOps::AddElemD(compiler, addedGCVars, varIndex); } #endif // DEBUG VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); } } #ifdef DEBUG if (compiler->verbose) { printf("\t\t\t\t\t\t\tLive regs: "); if (regSet.rsMaskVars == newLiveRegSet) { printf("(unchanged) "); } else { printRegMaskInt(regSet.rsMaskVars); compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars); printf(" => "); } printRegMaskInt(newLiveRegSet); compiler->getEmitter()->emitDispRegSet(newLiveRegSet); printf("\n"); if (!VarSetOps::IsEmpty(compiler, addedGCVars)) { printf("\t\t\t\t\t\t\tAdded GCVars: "); dumpConvertedVarSet(compiler, addedGCVars); printf("\n"); } if (!VarSetOps::IsEmpty(compiler, removedGCVars)) { printf("\t\t\t\t\t\t\tRemoved GCVars: "); dumpConvertedVarSet(compiler, removedGCVars); printf("\n"); } } #endif // DEBUG regSet.rsMaskVars = newLiveRegSet; gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUG_ARG(true)); gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUG_ARG(true)); /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to represent the exception object (TYP_REF). We mark REG_EXCEPTION_OBJECT as holding a GC object on entry to the block, it will be the first thing evaluated (thanks to GTF_ORDER_SIDEEFF). */ if (handlerGetsXcptnObj(block->bbCatchTyp)) { #if JIT_FEATURE_SSA_SKIP_DEFS GenTreePtr firstStmt = block->FirstNonPhiDef(); #else GenTreePtr firstStmt = block->bbTreeList; #endif if (firstStmt != NULL) { GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr; if (compiler->gtHasCatchArg(firstTree)) { gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT); } } } /* Start a new code output block */ genUpdateCurrentFunclet(block); if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD) { getEmitter()->emitLoopAlign(); } #ifdef DEBUG if (compiler->opts.dspCode) printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum); #endif block->bbEmitCookie = NULL; if (block->bbFlags & (BBF_JMP_TARGET|BBF_HAS_LABEL)) { /* Mark a label and update the current set of live GC refs */ block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, FALSE); } if (block == compiler->fgFirstColdBlock) { #ifdef DEBUG if (compiler->verbose) { printf("\nThis is the start of the cold region of the method\n"); } #endif // We should never have a block that falls through into the Cold section noway_assert(!lblk->bbFallsThrough()); // We require the block that starts the Cold section to have a label noway_assert(block->bbEmitCookie); getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie); } /* Both stacks are always empty on entry to a basic block */ genStackLevel = 0; savedStkLvl = genStackLevel; /* Tell everyone which basic block we're working on */ compiler->compCurBB = block; #ifdef DEBUGGING_SUPPORT siBeginBlock(block); // BBF_INTERNAL blocks don't correspond to any single IL instruction. if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to emit a NO_MAPPING entry, immediately after the prolog. { genIPmappingAdd((IL_OFFSETX) ICorDebugInfo::NO_MAPPING, true); } bool firstMapping = true; #endif // DEBUGGING_SUPPORT /*--------------------------------------------------------------------- * * Generate code for each statement-tree in the block * */ #if FEATURE_EH_FUNCLETS if (block->bbFlags & BBF_FUNCLET_BEG) { genReserveFuncletProlog(block); } #endif // FEATURE_EH_FUNCLETS for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext) { noway_assert(stmt->gtOper == GT_STMT); if (stmt->AsStmt()->gtStmtIsEmbedded()) continue; /* Get hold of the statement tree */ GenTreePtr tree = stmt->gtStmt.gtStmtExpr; #if defined(DEBUGGING_SUPPORT) /* Do we have a new IL-offset ? */ if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET) { /* Create and append a new IP-mapping entry */ genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping); firstMapping = false; } #endif // DEBUGGING_SUPPORT #ifdef DEBUG noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize || stmt->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET); if (compiler->opts.dspCode && compiler->opts.dspInstrs && stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET) { while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs) { genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> "); } } stmtNum++; if (compiler->verbose) { printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum); printf("Holding variables: "); dspRegMask(regSet.rsMaskVars); printf("\n\n"); if (compiler->verboseTrees) { compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree); printf("\n"); } } totalCostEx += ((UINT64)stmt->gtCostEx * block->getBBWeight(compiler)); totalCostSz += (UINT64) stmt->gtCostSz; #endif // DEBUG // Traverse the tree in linear order, generating code for each node in the // tree as we encounter it compiler->compCurLifeTree = NULL; compiler->compCurStmt = stmt; for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList; treeNode != NULL; treeNode = treeNode->gtNext) { genCodeForTreeNode(treeNode); if (treeNode->gtHasReg() && treeNode->gtLsraInfo.isLocalDefUse) { genConsumeReg(treeNode); } } #ifdef FEATURE_SIMD // If the next statement expr is a SIMDIntrinsicUpperRestore, don't call rsSpillChk because we // haven't yet restored spills from the most recent call. GenTree* nextTopLevelStmt = stmt->AsStmt()->gtStmtNextTopLevelStmt(); if ((nextTopLevelStmt == nullptr) || (nextTopLevelStmt->AsStmt()->gtStmtExpr->OperGet() != GT_SIMD) || (nextTopLevelStmt->AsStmt()->gtStmtExpr->gtSIMD.gtSIMDIntrinsicID != SIMDIntrinsicUpperRestore)) #endif // FEATURE_SIMD { regSet.rsSpillChk(); } #ifdef DEBUG /* Make sure we didn't bungle pointer register tracking */ regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur); regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars; // If return is a GC-type, clear it. Note that if a common // epilog is generated (genReturnBB) it has a void return // even though we might return a ref. We can't use the compRetType // as the determiner because something we are tracking as a byref // might be used as a return value of a int function (which is legal) if (tree->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) || (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet())))) { nonVarPtrRegs &= ~RBM_INTRET; } // When profiling, the first statement in a catch block will be the // harmless "inc" instruction (does not interfere with the exception // object). if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) && (stmt == block->bbTreeList) && handlerGetsXcptnObj(block->bbCatchTyp)) { nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT; } if (nonVarPtrRegs) { printf("Regset after tree="); compiler->printTreeID(tree); printf(" BB%02u gcr=", block->bbNum); printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars); printf(", byr="); printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars); printf(", regVars="); printRegMaskInt(regSet.rsMaskVars); compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars); printf("\n"); } noway_assert(nonVarPtrRegs == 0); for (GenTree * node = stmt->gtStmt.gtStmtList; node; node=node->gtNext) { assert(!(node->gtFlags & GTF_SPILL)); } #endif // DEBUG noway_assert(stmt->gtOper == GT_STMT); #ifdef DEBUGGING_SUPPORT genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx); #endif } //-------- END-FOR each statement-tree of the current block --------- #if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_) if (block->bbNext == nullptr) { // Unit testing of the AMD64 emitter: generate a bunch of instructions into the last block // (it's as good as any, but better than the prolog, which can only be a single instruction // group) then use COMPLUS_JitLateDisasm=* to see if the late disassembler // thinks the instructions are the same as we do. genAmd64EmitterUnitTests(); } #endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_ARM64_) #ifdef DEBUGGING_SUPPORT if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0)) { siEndBlock(block); /* Is this the last block, and are there any open scopes left ? */ bool isLastBlockProcessed = (block->bbNext == NULL); if (block->isBBCallAlwaysPair()) { isLastBlockProcessed = (block->bbNext->bbNext == NULL); } if (isLastBlockProcessed && siOpenScopeList.scNext) { /* This assert no longer holds, because we may insert a throw block to demarcate the end of a try or finally region when they are at the end of the method. It would be nice if we could fix our code so that this throw block will no longer be necessary. */ //noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize); siCloseAllOpenScopes(); } } #endif // DEBUGGING_SUPPORT genStackLevel -= savedStkLvl; #ifdef DEBUG // compCurLife should be equal to the liveOut set, except that we don't keep // it up to date for vars that are not register candidates // (it would be nice to have a xor set function) VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife)); VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut)); VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex); while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex)) { unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex]; LclVarDsc * varDsc = compiler->lvaTable + varNum; assert(!varDsc->lvIsRegCandidate()); } #endif /* Both stacks should always be empty on exit from a basic block */ noway_assert(genStackLevel == 0); #ifdef _TARGET_AMD64_ // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region. // The document "X64 and ARM ABIs.docx" has more details. The situations: // 1. If the call instruction is in a different EH region as the instruction that follows it. // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters here.) // We handle case #1 here, and case #2 in the emitter. if (getEmitter()->emitIsLastInsCall()) { // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold? // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically, // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions // generated before the OS epilog starts, such as a GS cookie check. if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) { // We only need the NOP if we're not going to generate any more code as part of the block end. switch (block->bbJumpKind) { case BBJ_ALWAYS: case BBJ_THROW: case BBJ_CALLFINALLY: case BBJ_EHCATCHRET: // We're going to generate more code below anyway, so no need for the NOP. case BBJ_RETURN: case BBJ_EHFINALLYRET: case BBJ_EHFILTERRET: // These are the "epilog follows" case, handled in the emitter. break; case BBJ_NONE: if (block->bbNext == nullptr) { // Call immediately before the end of the code; we should never get here . instGen(INS_BREAKPOINT); // This should never get executed } else { // We need the NOP instGen(INS_nop); } break; case BBJ_COND: case BBJ_SWITCH: // These can't have a call as the last instruction! default: noway_assert(!"Unexpected bbJumpKind"); break; } } } #endif // _TARGET_AMD64_ /* Do we need to generate a jump or return? */ switch (block->bbJumpKind) { case BBJ_ALWAYS: inst_JMP(EJ_jmp, block->bbJumpDest); break; case BBJ_RETURN: genExitCode(block); break; case BBJ_THROW: // If we have a throw at the end of a function or funclet, we need to emit another instruction // afterwards to help the OS unwinder determine the correct context during unwind. // We insert an unexecuted breakpoint instruction in several situations // following a throw instruction: // 1. If the throw is the last instruction of the function or funclet. This helps // the OS unwinder determine the correct context during an unwind from the // thrown exception. // 2. If this is this is the last block of the hot section. // 3. If the subsequent block is a special throw block. // 4. On AMD64, if the next block is in a different EH region. if ((block->bbNext == NULL) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) || !BasicBlock::sameEHRegion(block, block->bbNext) || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) || block->bbNext == compiler->fgFirstColdBlock ) { instGen(INS_BREAKPOINT); // This should never get executed } break; case BBJ_CALLFINALLY: #if FEATURE_EH_FUNCLETS // Generate a call to the finally, like this: // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym // call finally-funclet // jmp finally-return // Only for non-retless finally calls // The jmp can be a NOP if we're going to the next block. getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_RCX, compiler->lvaPSPSym, 0); getEmitter()->emitIns_J(INS_call, block->bbJumpDest); if (block->bbFlags & BBF_RETLESS_CALL) { // We have a retless call, and the last instruction generated was a call. // If the next block is in a different EH region (or is the end of the code // block), then we need to generate a breakpoint here (since it will never // get executed) to get proper unwind behavior. if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext)) { instGen(INS_BREAKPOINT); // This should never get executed } } else { // Because of the way the flowgraph is connected, the liveness info for this one instruction // after the call is not (can not be) correct in cases where a variable has a last use in the // handler. So turn off GC reporting for this single instruction. getEmitter()->emitMakeRemainderNonInterruptible(); // Now go to where the finally funclet needs to return to. if (block->bbNext->bbJumpDest == block->bbNext->bbNext) { // Fall-through. // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly // to the next instruction? This would depend on stack walking from within the finally // handler working without this instruction being in this special EH region. instGen(INS_nop); } else { inst_JMP(EJ_jmp, block->bbNext->bbJumpDest); } } // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the // jump target using bbJumpDest - that is already used to point // to the finally block. So just skip past the BBJ_ALWAYS unless the // block is RETLESS. if ( !(block->bbFlags & BBF_RETLESS_CALL) ) { assert(block->isBBCallAlwaysPair()); lblk = block; block = block->bbNext; } #else // !FEATURE_EH_FUNCLETS NYI_X86("EH for RyuJIT x86"); #endif // !FEATURE_EH_FUNCLETS break; case BBJ_EHCATCHRET: // Set EAX to the address the VM should return to after the catch. // Generate a RIP-relative // lea reg, [rip + disp32] ; the RIP is implicit // which will be position-indepenent. // TODO-XArch-Bug?: For ngen, we need to generate a reloc for the displacement (maybe EA_PTR_DSP_RELOC). getEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, block->bbJumpDest, REG_INTRET); __fallthrough; case BBJ_EHFINALLYRET: case BBJ_EHFILTERRET: #if FEATURE_EH_FUNCLETS genReserveFuncletEpilog(block); #else // !FEATURE_EH_FUNCLETS NYI_X86("EH for RyuJIT x86"); #endif // !FEATURE_EH_FUNCLETS break; case BBJ_NONE: case BBJ_COND: case BBJ_SWITCH: break; default: noway_assert(!"Unexpected bbJumpKind"); break; } #ifdef DEBUG compiler->compCurBB = 0; #endif } //------------------ END-FOR each block of the method ------------------- /* Nothing is live at this point */ genUpdateLife(VarSetOps::MakeEmpty(compiler)); /* Finalize the spill tracking logic */ regSet.rsSpillEnd(); /* Finalize the temp tracking logic */ compiler->tmpEnd(); #ifdef DEBUG if (compiler->verbose) { printf("\n# "); printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz); printf("%s\n", compiler->info.compFullName); } #endif } // return the child that has the same reg as the dst (if any) // other child returned (out param) in 'other' GenTree * sameRegAsDst(GenTree *tree, GenTree *&other /*out*/) { if (tree->gtRegNum == REG_NA) { other = nullptr; return NULL; } GenTreePtr op1 = tree->gtOp.gtOp1; GenTreePtr op2 = tree->gtOp.gtOp2; if (op1->gtRegNum == tree->gtRegNum) { other = op2; return op1; } if (op2->gtRegNum == tree->gtRegNum) { other = op1; return op2; } else { other = nullptr; return NULL; } } // move an immediate value into an integer register void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags) { // reg cannot be a FP register assert(!genIsValidFloatReg(reg)); if (!compiler->opts.compReloc) { size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs } if ((imm == 0) && !EA_IS_RELOC(size)) { instGen_Set_Reg_To_Zero(size, reg, flags); } else { if (genAddrShouldUsePCRel(imm)) { getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm); } else { getEmitter()->emitIns_R_I(INS_mov, size, reg, imm); } } regTracker.rsTrackRegIntCns(reg, imm); } /*********************************************************************************** * * Generate code to set a register 'targetReg' of type 'targetType' to the constant * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call * genProduceReg() on the target register. */ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree) { switch (tree->gtOper) { case GT_CNS_INT: { // relocatable values tend to come down as a CNS_INT of native int type // so the line between these two opcodes is kind of blurry GenTreeIntConCommon* con = tree->AsIntConCommon(); ssize_t cnsVal = con->IconValue(); bool needReloc = compiler->opts.compReloc && tree->IsIconHandle(); if (needReloc) { instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal); regTracker.rsTrackRegTrash(targetReg); } else { genSetRegToIcon(targetReg, cnsVal, targetType); } } break; case GT_CNS_DBL: { double constValue = tree->gtDblCon.gtDconVal; // Make sure we use "xorpd reg, reg" only for +ve zero constant (0.0) and not for -ve zero (-0.0) if (*(__int64*)&constValue == 0) { // A faster/smaller way to generate 0 instruction ins = genGetInsForOper(GT_XOR, targetType); inst_RV_RV(ins, targetReg, targetReg, targetType); } else { GenTreePtr cns; if (targetType == TYP_FLOAT) { float f = forceCastToFloat(constValue); cns = genMakeConst(&f, targetType, tree, false); } else { cns = genMakeConst(&constValue, targetType, tree, true); } inst_RV_TT(ins_Load(targetType), targetReg, cns); } } break; default: unreached(); } } // Generate code to get the high N bits of a N*N=2N bit multiplication result void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) { assert(!(treeNode->gtFlags & GTF_UNSIGNED)); assert(!treeNode->gtOverflowEx()); regNumber targetReg = treeNode->gtRegNum; var_types targetType = treeNode->TypeGet(); emitter *emit = getEmitter(); emitAttr size = emitTypeSize(treeNode); GenTree *op1 = treeNode->gtOp.gtOp1; GenTree *op2 = treeNode->gtOp.gtOp2; // to get the high bits of the multiply, we are constrained to using the // 1-op form: RDX:RAX = RAX * rm // The 3-op form (Rx=Ry*Rz) does not support it. genConsumeOperands(treeNode->AsOp()); GenTree* regOp = op1; GenTree* rmOp = op2; // Set rmOp to the contained memory operand (if any) // if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg))) { regOp = op2; rmOp = op1; } assert(!regOp->isContained()); // Setup targetReg when neither of the source operands was a matching register if (regOp->gtRegNum != targetReg) { inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType); } emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp); // Move the result to the desired register, if necessary if (targetReg != REG_RDX) { inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); } } // generate code for a DIV or MOD operation // void CodeGen::genCodeForDivMod(GenTreeOp* treeNode) { GenTree *dividend = treeNode->gtOp1; GenTree *divisor = treeNode->gtOp2; genTreeOps oper = treeNode->OperGet(); emitAttr size = emitTypeSize(treeNode); regNumber targetReg = treeNode->gtRegNum; var_types targetType = treeNode->TypeGet(); emitter *emit = getEmitter(); // dividend is not contained. assert(!dividend->isContained()); genConsumeOperands(treeNode->AsOp()); if (varTypeIsFloating(targetType)) { // divisor is not contained or if contained is a memory op assert(!divisor->isContained() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl()); // Floating point div/rem operation assert(oper == GT_DIV || oper == GT_MOD); if (dividend->gtRegNum == targetReg) { emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor); } else if (divisor->gtRegNum == targetReg) { // It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2 // because divss/divsd reg1, reg2 will over-write reg1. Therefore, in case of AMD64 // LSRA has to make sure that such a register assignment is not generated for floating // point div/rem operations. noway_assert(!"GT_DIV/GT_MOD (float): case of reg2 = reg1 / reg2, LSRA should never generate such a reg assignment"); } else { inst_RV_RV(ins_Copy(targetType), targetReg, dividend->gtRegNum, targetType); emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor); } } else { // dividend must be in RAX if (dividend->gtRegNum != REG_RAX) inst_RV_RV(INS_mov, REG_RAX, dividend->gtRegNum, targetType); // zero or sign extend rax to rdx if (oper == GT_UMOD || oper == GT_UDIV) { instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX); } else { emit->emitIns(INS_cdq, size); // the cdq instruction writes RDX, So clear the gcInfo for RDX gcInfo.gcMarkRegSetNpt(RBM_RDX); } if (divisor->isContainedIntOrIImmed()) { GenTreeIntConCommon* divImm = divisor->AsIntConCommon(); assert(divImm->IsIntCnsFitsInI32()); ssize_t imm = divImm->IconValue(); assert(isPow2(abs(imm))); genCodeForPow2Div(treeNode->AsOp()); } else { // Perform the 'targetType' (64-bit or 32-bit) divide instruction instruction ins; if (oper == GT_UMOD || oper == GT_UDIV) ins = INS_div; else ins = INS_idiv; emit->emitInsBinary(ins, size, treeNode, divisor); // Signed divide RDX:RAX by r/m64, with result // stored in RAX := Quotient, RDX := Remainder. // Move the result to the desired register, if necessary if (oper == GT_DIV || oper == GT_UDIV) { if (targetReg != REG_RAX) { inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); } } else { assert((oper == GT_MOD) || (oper == GT_UMOD)); if (targetReg != REG_RDX) { inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType); } } } } genProduceReg(treeNode); } // Generate code for ADD, SUB, AND XOR, and OR. // mul and div variants have special constraints on x64 so are not handled here. void CodeGen::genCodeForBinary(GenTree* treeNode) { const genTreeOps oper = treeNode->OperGet(); regNumber targetReg = treeNode->gtRegNum; var_types targetType = treeNode->TypeGet(); emitter *emit = getEmitter(); assert (oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB); GenTreePtr op1 = treeNode->gtGetOp1(); GenTreePtr op2 = treeNode->gtGetOp2(); instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); // The arithmetic node must be sitting in a register (since it's not contained) noway_assert(targetReg != REG_NA); regNumber op1reg = op1->gtRegNum; regNumber op2reg = op2->gtRegNum; GenTreePtr dst; GenTreePtr src; genConsumeOperands(treeNode->AsOp()); // This is the case of reg1 = reg1 op reg2 // We're ready to emit the instruction without any moves if (op1reg == targetReg) { dst = op1; src = op2; } // We have reg1 = reg2 op reg1 // In order for this operation to be correct // we need that op is a commutative operation so // we can convert it into reg1 = reg1 op reg2 and emit // the same code as above else if (op2reg == targetReg) { noway_assert(GenTree::OperIsCommutative(oper)); dst = op2; src = op1; } // now we know there are 3 different operands so attempt to use LEA else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags && (op2->isContainedIntOrIImmed() || !op2->isContained()) ) { if (op2->isContainedIntOrIImmed()) { emit->emitIns_R_AR(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, (int) op2->AsIntConCommon()->IconValue()); } else { assert(op2reg != REG_NA); emit->emitIns_R_ARX(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, op2reg, 1, 0); } genProduceReg(treeNode); return; } // dest, op1 and op2 registers are different: // reg3 = reg1 op reg2 // We can implement this by issuing a mov: // reg3 = reg1 // reg3 = reg3 op reg2 else { inst_RV_RV(ins_Copy(targetType), targetReg, op1reg, targetType); regTracker.rsTrackRegCopy(targetReg, op1reg); gcInfo.gcMarkRegPtrVal(targetReg, targetType); dst = treeNode; src = op2; } // try to use an inc or dec if (oper == GT_ADD && !varTypeIsFloating(treeNode) && src->isContainedIntOrIImmed() && !treeNode->gtOverflowEx()) { if (src->gtIntConCommon.IconValue() == 1) { emit->emitIns_R(INS_inc, emitTypeSize(treeNode), targetReg); genProduceReg(treeNode); return; } else if (src->gtIntConCommon.IconValue() == -1) { emit->emitIns_R(INS_dec, emitTypeSize(treeNode), targetReg); genProduceReg(treeNode); return; } } regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src); noway_assert(r == targetReg); if (treeNode->gtOverflowEx()) { assert(oper == GT_ADD || oper == GT_SUB); genCheckOverflow(treeNode); } genProduceReg(treeNode); } /***************************************************************************** * * Generate code for a single node in the tree. * Preconditions: All operands have been evaluated * */ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode) { regNumber targetReg; #if !defined(_TARGET_64BIT_) if (treeNode->TypeGet() == TYP_LONG) { // All long enregistered nodes will have been decomposed into their // constituent lo and hi nodes. regPairNo targetPair = treeNode->gtRegPair; noway_assert(targetPair == REG_PAIR_NONE); targetReg = REG_NA; } else #endif // !defined(_TARGET_64BIT_) { targetReg = treeNode->gtRegNum; } var_types targetType = treeNode->TypeGet(); emitter *emit = getEmitter(); #ifdef DEBUG if (compiler->verbose) { unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio printf("Generating: "); compiler->gtDispTree(treeNode, nullptr, nullptr, true); } #endif // DEBUG // Is this a node whose value is already in a register? LSRA denotes this by // setting the GTF_REUSE_REG_VAL flag. if (treeNode->IsReuseRegVal()) { // For now, this is only used for constant nodes. assert((treeNode->OperIsConst())); JITDUMP(" TreeNode is marked ReuseReg\n"); return; } // contained nodes are part of their parents for codegen purposes // ex : immediates, most LEAs if (treeNode->isContained()) { return; } switch (treeNode->gtOper) { case GT_START_NONGC: getEmitter()->emitMakeRemainderNonInterruptible(); break; case GT_PROF_HOOK: #ifdef PROFILING_SUPPORTED // We should be seeing this only if profiler hook is needed noway_assert(compiler->compIsProfilerHookNeeded()); // Right now this node is used only for tail calls. In future if // we intend to use it for Enter or Leave hooks, add a data member // to this node indicating the kind of profiler hook. For example, // helper number can be used. genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); #endif // PROFILING_SUPPORTED break; case GT_LCLHEAP: genLclHeap(treeNode); break; case GT_CNS_INT: case GT_CNS_DBL: genSetRegToConst(targetReg, targetType, treeNode); genProduceReg(treeNode); break; case GT_NEG: case GT_NOT: if (varTypeIsFloating(targetType)) { assert(treeNode->gtOper == GT_NEG); genSSE2BitwiseOp(treeNode); } else { GenTreePtr operand = treeNode->gtGetOp1(); assert(!operand->isContained()); regNumber operandReg = genConsumeReg(operand); if (operandReg != targetReg) { inst_RV_RV(INS_mov, targetReg, operandReg, targetType); } instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); inst_RV(ins, targetReg, targetType); } genProduceReg(treeNode); break; case GT_OR: case GT_XOR: case GT_AND: assert(varTypeIsIntegralOrI(treeNode)); __fallthrough; case GT_ADD: case GT_SUB: genCodeForBinary(treeNode); break; case GT_LSH: case GT_RSH: case GT_RSZ: genCodeForShift(treeNode->gtGetOp1(), treeNode->gtGetOp2(), treeNode); // genCodeForShift() calls genProduceReg() break; case GT_CAST: #if !defined(_TARGET_64BIT_) // We will NYI in DecomposeNode() if we are cast TO a long type, but we do not // yet support casting FROM a long type either, and that's simpler to catch // here. NYI_IF(varTypeIsLong(treeNode->gtOp.gtOp1), "Casts from TYP_LONG"); #endif // !defined(_TARGET_64BIT_) if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1)) { // Casts float/double <--> double/float genFloatToFloatCast(treeNode); } else if (varTypeIsFloating(treeNode->gtOp.gtOp1)) { // Casts float/double --> int32/int64 genFloatToIntCast(treeNode); } else if (varTypeIsFloating(targetType)) { // Casts int32/uint32/int64/uint64 --> float/double genIntToFloatCast(treeNode); } else { // Casts int <--> int genIntToIntCast(treeNode); } // The per-case functions call genProduceReg() break; case GT_LCL_VAR: { // lcl_vars are not defs assert((treeNode->gtFlags & GTF_VAR_DEF) == 0); GenTreeLclVarCommon *lcl = treeNode->AsLclVarCommon(); bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate(); if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH)) { assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED)); } // If this is a register candidate that has been spilled, genConsumeReg() will // reload it at the point of use. Otherwise, if it's not in a register, we load it here. if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED)) { assert(!isRegCandidate); emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0); genProduceReg(treeNode); } } break; case GT_LCL_FLD_ADDR: case GT_LCL_VAR_ADDR: { // Address of a local var. This by itself should never be allocated a register. // If it is worth storing the address in a register then it should be cse'ed into // a temp and that would be allocated a register. noway_assert(targetType == TYP_BYREF); noway_assert(!treeNode->InReg()); inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF); } genProduceReg(treeNode); break; case GT_LCL_FLD: { noway_assert(targetType != TYP_STRUCT); noway_assert(treeNode->gtRegNum != REG_NA); #ifdef FEATURE_SIMD // Loading of TYP_SIMD12 (i.e. Vector3) field if (treeNode->TypeGet() == TYP_SIMD12) { genLoadLclFldTypeSIMD12(treeNode); break; } #endif emitAttr size = emitTypeSize(targetType); unsigned offs = treeNode->gtLclFld.gtLclOffs; unsigned varNum = treeNode->gtLclVarCommon.gtLclNum; assert(varNum < compiler->lvaCount); emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs); } genProduceReg(treeNode); break; case GT_STORE_LCL_FLD: { noway_assert(targetType != TYP_STRUCT); noway_assert(!treeNode->InReg()); assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); #ifdef FEATURE_SIMD // storing of TYP_SIMD12 (i.e. Vector3) field if (treeNode->TypeGet() == TYP_SIMD12) { genStoreLclFldTypeSIMD12(treeNode); break; } #endif GenTreePtr op1 = treeNode->gtOp.gtOp1; genConsumeRegs(op1); emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1); } break; case GT_STORE_LCL_VAR: { noway_assert(targetType != TYP_STRUCT); assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum; LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); // Ensure that lclVar nodes are typed correctly. assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet())); #if !defined(_TARGET_64BIT_) if (treeNode->TypeGet() == TYP_LONG) { genStoreLongLclVar(treeNode); break; } #endif // !defined(_TARGET_64BIT_) GenTreePtr op1 = treeNode->gtOp.gtOp1; genConsumeRegs(op1); if (treeNode->gtRegNum == REG_NA) { // stack store emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode); varDsc->lvRegNum = REG_STK; } else { bool containedOp1 = op1->isContained(); // Look for the case where we have a constant zero which we've marked for reuse, // but which isn't actually in the register we want. In that case, it's better to create // zero in the target register, because an xor is smaller than a copy. Note that we could // potentially handle this in the register allocator, but we can't always catch it there // because the target may not have a register allocated for it yet. if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) && op1->IsZero()) { op1->gtRegNum = REG_NA; op1->ResetReuseRegVal(); containedOp1 = true; } if (containedOp1) { // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register // must be a constant. However, in the future we might want to support a contained memory op. // This is a bit tricky because we have to decide it's contained before register allocation, // and this would be a case where, once that's done, we need to mark that node as always // requiring a register - which we always assume now anyway, but once we "optimize" that // we'll have to take cases like this into account. assert((op1->gtRegNum == REG_NA) && op1->OperIsConst()); genSetRegToConst(treeNode->gtRegNum, targetType, op1); } else if (op1->gtRegNum != treeNode->gtRegNum) { assert(op1->gtRegNum != REG_NA); emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1); } } if (treeNode->gtRegNum != REG_NA) genProduceReg(treeNode); } break; case GT_RETFILT: // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in // the return register, if it's not already there. The processing is the same as GT_RETURN. if (targetType != TYP_VOID) { // For filters, the IL spec says the result is type int32. Further, the only specified legal values // are 0 or 1, with the use of other values "undefined". assert(targetType == TYP_INT); } __fallthrough; case GT_RETURN: { GenTreePtr op1 = treeNode->gtOp.gtOp1; if (targetType == TYP_VOID) { assert(op1 == nullptr); } #if !defined(_TARGET_64BIT_) else if (treeNode->TypeGet() == TYP_LONG) { assert(op1 != nullptr); noway_assert(op1->OperGet() == GT_LONG); GenTree* loRetVal = op1->gtGetOp1(); GenTree* hiRetVal = op1->gtGetOp2(); noway_assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA)); genConsumeReg(loRetVal); genConsumeReg(hiRetVal); if (loRetVal->gtRegNum != REG_LNGRET_LO) { inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT); } if (hiRetVal->gtRegNum != REG_LNGRET_HI) { inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT); } } #endif // !defined(_TARGET_64BIT_) else { assert(op1 != nullptr); noway_assert(op1->gtRegNum != REG_NA); // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has // consumed a reg for the operand. This is because the variable // is dead after return. But we are issuing more instructions // like "profiler leave callback" after this consumption. So // if you are issuing more instructions after this point, // remember to keep the variable live up until the new method // exit point where it is actually dead. genConsumeReg(op1); regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET; #ifdef _TARGET_X86_ if (varTypeIsFloating(treeNode)) { if (genIsRegCandidateLocal(op1) && !compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegister) { // Store local variable to its home location, if necessary. if ((op1->gtFlags & GTF_REG_VAL) != 0) { op1->gtFlags &= ~GTF_REG_VAL; inst_TT_RV(ins_Store(op1->gtType, compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)), op1, op1->gtRegNum); } // Now, load it to the fp stack. getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0); } else { // Spill the value, which should be in a register, then load it to the fp stack. // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet). op1->gtFlags |= GTF_SPILL; regSet.rsSpillTree(op1->gtRegNum, op1); op1->gtFlags |= GTF_SPILLED; op1->gtFlags &= ~GTF_SPILL; TempDsc* t = regSet.rsUnspillInPlace(op1); inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0); op1->gtFlags &= ~GTF_SPILLED; compiler->tmpRlsTemp(t); } } else #endif // _TARGET_X86_ if (op1->gtRegNum != retReg) { inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType); } } #ifdef PROFILING_SUPPORTED // There will be a single return block while generating profiler ELT callbacks. // // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN: // In flowgraph and other places assert that the last node of a block marked as // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to // maintain such an invariant irrespective of whether profiler hook needed or not. // Also, there is not much to be gained by materializing it as an explicit node. if (compiler->compCurBB == compiler->genReturnBB) { // !! NOTE !! // Since we are invalidating the assumption that we would slip into the epilog // right after the "return", we need to preserve the return reg's GC state // across the call until actual method return. if (varTypeIsGC(compiler->info.compRetType)) { gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType); } genProfilingLeaveCallback(); if (varTypeIsGC(compiler->info.compRetType)) { gcInfo.gcMarkRegSetNpt(REG_INTRET); } } #endif } break; case GT_LEA: { // if we are here, it is the case where there is an LEA that cannot // be folded into a parent instruction GenTreeAddrMode *lea = treeNode->AsAddrMode(); genLeaInstruction(lea); } // genLeaInstruction calls genProduceReg() break; case GT_IND: #ifdef FEATURE_SIMD // Handling of Vector3 type values loaded through indirection. if (treeNode->TypeGet() == TYP_SIMD12) { genLoadIndTypeSIMD12(treeNode); break; } #endif // FEATURE_SIMD genConsumeAddress(treeNode->AsIndir()->Addr()); emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode); genProduceReg(treeNode); break; case GT_MULHI: genCodeForMulHi(treeNode->AsOp()); genProduceReg(treeNode); break; case GT_MUL: { instruction ins; emitAttr size = emitTypeSize(treeNode); bool isUnsignedMultiply = ((treeNode->gtFlags & GTF_UNSIGNED) != 0); bool requiresOverflowCheck = treeNode->gtOverflowEx(); // TODO-XArch-CQ: use LEA for mul by imm GenTree *op1 = treeNode->gtOp.gtOp1; GenTree *op2 = treeNode->gtOp.gtOp2; // there are 3 forms of x64 multiply: // 1-op form with 128 result: RDX:RAX = RAX * rm // 2-op form: reg *= rm // 3-op form: reg = rm * imm genConsumeOperands(treeNode->AsOp()); // This matches the 'mul' lowering in Lowering::SetMulOpCounts() // // immOp :: Only one operand can be an immediate // rmOp :: Only operand can be a memory op. // regOp :: A register op (especially the operand that matches 'targetReg') // (can be nullptr when we have both a memory op and an immediate op) GenTree * immOp = nullptr; GenTree * rmOp = op1; GenTree * regOp; if (op2->isContainedIntOrIImmed()) { immOp = op2; } else if (op1->isContainedIntOrIImmed()) { immOp = op1; rmOp = op2; } if (immOp != nullptr) { // This must be a non-floating point operation. assert(!varTypeIsFloating(treeNode)); // use the 3-op form with immediate ins = getEmitter()->inst3opImulForReg(targetReg); emit->emitInsBinary(ins, size, rmOp, immOp); } else // we have no contained immediate operand { regOp = op1; rmOp = op2; regNumber mulTargetReg = targetReg; if (isUnsignedMultiply && requiresOverflowCheck) { ins = INS_mulEAX; mulTargetReg = REG_RAX; } else { ins = genGetInsForOper(GT_MUL, targetType); } // Set rmOp to the contain memory operand (if any) // or set regOp to the op2 when it has the matching target register for our multiply op // if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == mulTargetReg))) { regOp = op2; rmOp = op1; } assert(!regOp->isContained()); // Setup targetReg when neither of the source operands was a matching register if (regOp->gtRegNum != mulTargetReg) { inst_RV_RV(ins_Copy(targetType), mulTargetReg, regOp->gtRegNum, targetType); } emit->emitInsBinary(ins, size, treeNode, rmOp); // Move the result to the desired register, if necessary if ((ins == INS_mulEAX) && (targetReg != REG_RAX)) { inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); } } if (requiresOverflowCheck) { // Overflow checking is only used for non-floating point types noway_assert(!varTypeIsFloating(treeNode)); genCheckOverflow(treeNode); } } genProduceReg(treeNode); break; case GT_MOD: case GT_UDIV: case GT_UMOD: // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a // helper call by front-end. Similarly we shouldn't be seeing GT_UDIV and GT_UMOD // on float/double args. noway_assert(!varTypeIsFloating(treeNode)); __fallthrough; case GT_DIV: genCodeForDivMod(treeNode->AsOp()); break; case GT_MATH: genMathIntrinsic(treeNode); break; #ifdef FEATURE_SIMD case GT_SIMD: genSIMDIntrinsic(treeNode->AsSIMD()); break; #endif // FEATURE_SIMD case GT_CKFINITE: genCkfinite(treeNode); break; case GT_EQ: case GT_NE: case GT_LT: case GT_LE: case GT_GE: case GT_GT: { // TODO-XArch-CQ: Check if we can use the currently set flags. // TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register // (signed < or >= where targetReg != REG_NA) GenTreeOp *tree = treeNode->AsOp(); GenTreePtr op1 = tree->gtOp1; GenTreePtr op2 = tree->gtOp2; var_types op1Type = op1->TypeGet(); var_types op2Type = op2->TypeGet(); #if !defined(_TARGET_64BIT_) NYI_IF(varTypeIsLong(op1Type) || varTypeIsLong(op2Type), "Comparison of longs"); #endif // !defined(_TARGET_64BIT_) genConsumeOperands(tree); instruction ins; emitAttr cmpAttr; if (varTypeIsFloating(op1Type)) { // SSE2 instruction ucomis[s|d] is performs unordered comparison and // updates rFLAGS register as follows. // Result of compare ZF PF CF // ----------------- ------------ // Unordered 1 1 1 <-- this result implies one of operands of compare is a NAN. // Greater 0 0 0 // Less Than 0 0 1 // Equal 1 0 0 // // From the above table the following equalities follow. As per ECMA spec *.UN opcodes perform // unordered comparison of floating point values. That is *.UN comparisons result in true when // one of the operands is a NaN whereas ordered comparisons results in false. // // Opcode Amd64 equivalent Comment // ------ ----------------- -------- // BLT.UN(a,b) ucomis[s|d] a, b Jb branches if CF=1, which means either aa that in turn implies a branch if a>b or unordered // jb // // BGT(a, b) ucomis[s|d] a, b branch if a>b // ja // // BLE.UN(a,b) ucomis[s|d] a, b jbe branches if CF=1 or ZF=1, which implies a<=b or unordered // jbe // // BLE(a,b) ucomis[s|d] b, a jae branches if CF=0, which mean b>=a or a<=b // jae // // BGE.UN(a,b) ucomis[s|d] b, a branch if b<=a or unordered ==> branch if a>=b or unordered // jbe // // BGE(a,b) ucomis[s|d] a, b branch if a>=b // jae // // BEQ.UN(a,b) ucomis[s|d] a, b branch if a==b or unordered. There is no BEQ.UN opcode in ECMA spec. // je This case is given for completeness, in case if JIT generates such // a gentree internally. // // BEQ(a,b) ucomis[s|d] a, b From the above table, PF=0 and ZF=1 corresponds to a==b. // jpe L1 // je // L1: // // BNE(a,b) ucomis[s|d] a, b branch if a!=b. There is no BNE opcode in ECMA spec. This case is // jne given for completeness, in case if JIT generates such a gentree // internally. // // BNE.UN(a,b) ucomis[s|d] a, b From the above table, PF=1 or ZF=0 implies unordered or a!=b // jpe // jne // // As we can see from the above equalities that the operands of a compare operator need to be // reveresed in case of BLT/CLT, BGT.UN/CGT.UN, BLE/CLE, BGE.UN/CGE.UN. bool reverseOps; if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0) { // Unordered comparison case reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE); } else { reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE); } if (reverseOps) { GenTreePtr tmp = op1; op1 = op2; op2 = tmp; } ins = ins_FloatCompare(op1Type); cmpAttr = emitTypeSize(op1Type); } else // not varTypeIsFloating(op1Type) { assert(!op1->isContainedIntOrIImmed()); // We no longer support swapping op1 and op2 to generate cmp reg, imm assert(!varTypeIsFloating(op2Type)); // By default we use an int32 sized cmp instruction // ins = INS_cmp; var_types cmpType = TYP_INT; // In the if/then/else statement below we may change the // 'cmpType' and/or 'ins' to generate a smaller instruction // Are we comparing two values that are the same size? // if (genTypeSize(op1Type) == genTypeSize(op2Type)) { if (op1Type == op2Type) { // If both types are exactly the same we can use that type cmpType = op1Type; } else if (genTypeSize(op1Type) == 8) { // If we have two different int64 types we need to use a long compare cmpType = TYP_LONG; } cmpAttr = emitTypeSize(cmpType); } else // Here we know that (op1Type != op2Type) { // Do we have a short compare against a constant in op2? // // We checked for this case in LowerCmp() and if we can perform a small // compare immediate we labeled this compare with a GTF_RELOP_SMALL // and for unsigned small non-equality compares the GTF_UNSIGNED flag. // if (op2->isContainedIntOrIImmed() && ((tree->gtFlags & GTF_RELOP_SMALL) != 0)) { assert(varTypeIsSmall(op1Type)); cmpType = op1Type; } else // compare two different sized operands { // For this case we don't want any memory operands, only registers or immediates // assert(!op1->isContainedMemoryOp()); assert(!op2->isContainedMemoryOp()); // Check for the case where one operand is an int64 type // Lower should have placed 32-bit operand in a register // for signed comparisons we will sign extend the 32-bit value in place. // bool op1Is64Bit = (genTypeSize(op1Type) == 8); bool op2Is64Bit = (genTypeSize(op2Type) == 8); if (op1Is64Bit) { cmpType = TYP_LONG; if (!(treeNode->gtFlags & GTF_UNSIGNED) && !op2Is64Bit) { assert(op2->gtRegNum != REG_NA); #ifdef _TARGET_X86_ NYI_X86("64 bit sign extensions for x86/RyuJIT"); #else // !_TARGET_X86_ inst_RV_RV(INS_movsxd, op2->gtRegNum, op2->gtRegNum, op2Type); #endif // !_TARGET_X86_ } } else if (op2Is64Bit) { cmpType = TYP_LONG; if (!(treeNode->gtFlags & GTF_UNSIGNED) && !op1Is64Bit) { assert(op1->gtRegNum != REG_NA); #ifdef _TARGET_X86_ NYI_X86("64 bit sign extensions for x86/RyuJIT"); #else // !_TARGET_X86_ inst_RV_RV(INS_movsxd, op1->gtRegNum, op1->gtRegNum, op1Type); #endif // !_TARGET_X86_ } } } cmpAttr = emitTypeSize(cmpType); } // See if we can generate a "test" instruction instead of a "cmp". // For this to generate the correct conditional branch we must have // a compare against zero. // if (op2->IsZero()) { if (op1->isContained()) { // op1 can be a contained memory op // or the special contained GT_AND that we created in Lowering::LowerCmp() // if ((op1->OperGet() == GT_AND)) { noway_assert(op1->gtOp.gtOp2->isContainedIntOrIImmed()); ins = INS_test; // we will generate "test andOp1, andOp2CnsVal" op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1 op1 = op1->gtOp.gtOp1; // overwrite op1 // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2); } } else // op1 is not contained thus it must be in a register { ins = INS_test; op2 = op1; // we will generate "test reg1,reg1" // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2); } } } emit->emitInsBinary(ins, cmpAttr, op1, op2); // Are we evaluating this into a register? if (targetReg != REG_NA) { genSetRegToCond(targetReg, tree); genProduceReg(tree); } } break; case GT_JTRUE: { GenTree *cmp = treeNode->gtOp.gtOp1; assert(cmp->OperIsCompare()); assert(compiler->compCurBB->bbJumpKind == BBJ_COND); // Get the "kind" and type of the comparison. Note that whether it is an unsigned cmp // is governed by a flag NOT by the inherent type of the node // TODO-XArch-CQ: Check if we can use the currently set flags. emitJumpKind jumpKind[2]; bool branchToTrueLabel[2]; genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel); BasicBlock* skipLabel = nullptr; if (jumpKind[0] != EJ_NONE) { BasicBlock *jmpTarget; if (branchToTrueLabel[0]) { jmpTarget = compiler->compCurBB->bbJumpDest; } else { // This case arises only for ordered GT_EQ right now assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0)); skipLabel = genCreateTempLabel(); jmpTarget = skipLabel; } inst_JMP(jumpKind[0], jmpTarget); } if (jumpKind[1] != EJ_NONE) { // the second conditional branch always has to be to the true label assert(branchToTrueLabel[1]); inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest); } if (skipLabel != nullptr) genDefineTempLabel(skipLabel); } break; case GT_RETURNTRAP: { // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC // based on the contents of 'data' GenTree *data = treeNode->gtOp.gtOp1; genConsumeRegs(data); GenTreeIntCon cns = intForm(TYP_INT, 0); emit->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns); BasicBlock* skipLabel = genCreateTempLabel(); inst_JMP(genJumpKindForOper(GT_EQ, true), skipLabel); // emit the call to the EE-helper that stops for GC (or other reasons) genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN); genDefineTempLabel(skipLabel); } break; case GT_STOREIND: { #ifdef FEATURE_SIMD // Storing Vector3 of size 12 bytes through indirection if (treeNode->TypeGet() == TYP_SIMD12) { genStoreIndTypeSIMD12(treeNode); break; } #endif //FEATURE_SIMD GenTree* data = treeNode->gtOp.gtOp2; GenTree* addr = treeNode->gtOp.gtOp1; assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet())); GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data); if (writeBarrierForm != GCInfo::WBF_NoBarrier) { // data and addr must be in registers. // Consume both registers so that any copies of interfering registers are taken care of. genConsumeOperands(treeNode->AsOp()); // At this point, we should not have any interference. // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go. noway_assert(data->gtRegNum != REG_ARG_0); // addr goes in REG_ARG_0 if (addr->gtRegNum != REG_ARG_0) { inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet()); } // data goes in REG_ARG_1 if (data->gtRegNum != REG_ARG_1) { inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet()); } genGCWriteBarrier(treeNode, writeBarrierForm); } else { bool reverseOps = ((treeNode->gtFlags & GTF_REVERSE_OPS) != 0); bool dataIsUnary = false; GenTree* nonRMWsrc = nullptr; // We must consume the operands in the proper execution order, so that liveness is // updated appropriately. if (!reverseOps) { genConsumeAddress(addr); } if (data->isContained() && !data->OperIsLeaf()) { dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0); if (!dataIsUnary) { nonRMWsrc = data->gtGetOp1(); if (nonRMWsrc->isIndir() && Lowering::IndirsAreEquivalent(nonRMWsrc, treeNode)) { nonRMWsrc = data->gtGetOp2(); } genConsumeRegs(nonRMWsrc); } } else { genConsumeRegs(data); } if (reverseOps) { genConsumeAddress(addr); } if (data->isContained() && !data->OperIsLeaf()) { if (dataIsUnary) { emit->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(treeNode), treeNode); } else { if (data->OperGet() == GT_LSH || data->OperGet() == GT_RSH || data->OperGet() == GT_RSZ) { genCodeForShift(addr, data->gtOp.gtOp2, data); } else { emit->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(treeNode), treeNode, nonRMWsrc); } } } else { emit->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(treeNode), treeNode); } } } break; case GT_COPY: // This is handled at the time we call genConsumeReg() on the GT_COPY break; case GT_SWAP: { // Swap is only supported for lclVar operands that are enregistered // We do not consume or produce any registers. Both operands remain enregistered. // However, the gc-ness may change. assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2)); GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon(); LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]); var_types type1 = varDsc1->TypeGet(); GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon(); LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]); var_types type2 = varDsc2->TypeGet(); // We must have both int or both fp regs assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2)); // FP swap is not yet implemented (and should have NYI'd in LSRA) assert(!varTypeIsFloating(type1)); regNumber oldOp1Reg = lcl1->gtRegNum; regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg); regNumber oldOp2Reg = lcl2->gtRegNum; regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg); // We don't call genUpdateVarReg because we don't have a tree node with the new register. varDsc1->lvRegNum = oldOp2Reg; varDsc2->lvRegNum = oldOp1Reg; // Do the xchg emitAttr size = EA_PTRSIZE; if (varTypeGCtype(type1) != varTypeGCtype(type2)) { // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers. // Otherwise it will leave them alone, which is correct if they have the same GC-ness. size = EA_GCREF; } inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size); // Update the gcInfo. // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output) gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask|oldOp2RegMask); gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask|oldOp2RegMask); // gcMarkRegPtrVal will do the appropriate thing for non-gc types. // It will also dump the updates. gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1); gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2); } break; case GT_LIST: case GT_ARGPLACE: // Nothing to do break; case GT_PUTARG_STK: #ifdef _TARGET_X86_ genPutArgStk(treeNode); #else // !_TARGET_X86_ { noway_assert(targetType != TYP_STRUCT); assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); // Get argument offset on stack. // Here we cross check that argument offset hasn't changed from lowering to codegen since // we are storing arg slot number in GT_PUTARG_STK node in lowering phase. int argOffset = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE; #ifdef DEBUG fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode); assert(curArgTabEntry); assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE); #endif GenTreePtr data = treeNode->gtOp.gtOp1; unsigned varNum; #if FEATURE_FASTTAILCALL bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea; #else const bool putInIncomingArgArea = false; #endif // Whether to setup stk arg in incoming or out-going arg area? // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area. // All other calls - stk arg is setup in out-going arg area. if (putInIncomingArgArea) { // The first varNum is guaranteed to be the first incoming arg of the method being compiled. // See lvaInitTypeRef() for the order in which lvaTable entries are initialized. varNum = 0; #ifdef DEBUG // This must be a fast tail call. assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall()); // Since it is a fast tail call, the existence of first incoming arg is guaranteed // because fast tail call requires that in-coming arg area of caller is >= out-going // arg area required for tail call. LclVarDsc* varDsc = compiler->lvaTable; assert(varDsc != nullptr); assert(varDsc->lvIsRegArg && ((varDsc->lvArgReg == REG_ARG_0) || (varDsc->lvArgReg == REG_FLTARG_0))); #endif } else { #if FEATURE_FIXED_OUT_ARGS varNum = compiler->lvaOutgoingArgSpaceVar; #else // !FEATURE_FIXED_OUT_ARGS NYI_X86("Stack args for x86/RyuJIT"); varNum = BAD_VAR_NUM; #endif // !FEATURE_FIXED_OUT_ARGS } if (data->isContained()) { getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), varNum, argOffset, (int) data->AsIntConCommon()->IconValue()); } else { genConsumeReg(data); getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, varNum, argOffset); } } #endif // !_TARGET_X86_ break; case GT_PUTARG_REG: { noway_assert(targetType != TYP_STRUCT); // commas show up here commonly, as part of a nullchk operation GenTree *op1 = treeNode->gtOp.gtOp1; // If child node is not already in the register we need, move it genConsumeReg(op1); if (treeNode->gtRegNum != op1->gtRegNum) { inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType); } } genProduceReg(treeNode); break; case GT_CALL: genCallInstruction(treeNode); break; case GT_JMP: genJmpMethod(treeNode); break; case GT_LOCKADD: case GT_XCHG: case GT_XADD: genLockedInstructions(treeNode); break; case GT_MEMORYBARRIER: instGen_MemoryBarrier(); break; case GT_CMPXCHG: { GenTreePtr location = treeNode->gtCmpXchg.gtOpLocation; // arg1 GenTreePtr value = treeNode->gtCmpXchg.gtOpValue; // arg2 GenTreePtr comparand = treeNode->gtCmpXchg.gtOpComparand; // arg3 assert(location->gtRegNum != REG_NA && location->gtRegNum != REG_RAX); assert(value->gtRegNum != REG_NA && value->gtRegNum != REG_RAX); genConsumeReg(location); genConsumeReg(value); genConsumeReg(comparand); // comparand goes to RAX; // Note that we must issue this move after the genConsumeRegs(), in case any of the above // have a GT_COPY from RAX. if (comparand->gtRegNum != REG_RAX) { inst_RV_RV(ins_Copy(comparand->TypeGet()), REG_RAX, comparand->gtRegNum, comparand->TypeGet()); } // location is Rm instGen(INS_lock); emit->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->gtRegNum, location->gtRegNum, 0); // Result is in RAX if (targetReg != REG_RAX) { inst_RV_RV(ins_Copy(targetType), targetReg, REG_RAX, targetType); } } genProduceReg(treeNode); break; case GT_RELOAD: // do nothing - reload is just a marker. // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child // into the register specified in this node. break; case GT_NOP: break; case GT_NO_OP: if (treeNode->gtFlags & GTF_NO_OP_NO) { noway_assert(!"GTF_NO_OP_NO should not be set"); } else { getEmitter()->emitIns_Nop(1); } break; case GT_ARR_BOUNDS_CHECK: #ifdef FEATURE_SIMD case GT_SIMD_CHK: #endif // FEATURE_SIMD genRangeCheck(treeNode); break; case GT_PHYSREG: if (treeNode->gtRegNum != treeNode->AsPhysReg()->gtSrcReg) { inst_RV_RV(INS_mov, treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg, targetType); genTransferRegGCState(treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg); } genProduceReg(treeNode); break; case GT_PHYSREGDST: break; case GT_NULLCHECK: { assert(!treeNode->gtOp.gtOp1->isContained()); regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1); emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0); } break; case GT_CATCH_ARG: noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp)); /* Catch arguments get passed in a register. genCodeForBBlist() would have marked it as holding a GC object, but not used. */ noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT); genConsumeReg(treeNode); break; #if !FEATURE_EH_FUNCLETS case GT_END_LFIN: NYI_X86("GT_END_LFIN codegen"); #endif case GT_PINVOKE_PROLOG: noway_assert(((gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0); // the runtime side requires the codegen here to be consistent emit->emitDisableRandomNops(); break; case GT_LABEL: genPendingCallLabel = genCreateTempLabel(); treeNode->gtLabel.gtLabBB = genPendingCallLabel; emit->emitIns_R_L(INS_lea, EA_PTRSIZE, genPendingCallLabel, treeNode->gtRegNum); break; case GT_COPYOBJ: genCodeForCpObj(treeNode->AsCpObj()); break; case GT_COPYBLK: { GenTreeCpBlk* cpBlkOp = treeNode->AsCpBlk(); if (cpBlkOp->gtBlkOpGcUnsafe) { getEmitter()->emitDisableGC(); } switch (cpBlkOp->gtBlkOpKind) { #ifdef _TARGET_AMD64_ case GenTreeBlkOp::BlkOpKindHelper: genCodeForCpBlk(cpBlkOp); break; #endif // _TARGET_AMD64_ case GenTreeBlkOp::BlkOpKindRepInstr: genCodeForCpBlkRepMovs(cpBlkOp); break; case GenTreeBlkOp::BlkOpKindUnroll: genCodeForCpBlkUnroll(cpBlkOp); break; default: unreached(); } if (cpBlkOp->gtBlkOpGcUnsafe) { getEmitter()->emitEnableGC(); } } break; case GT_INITBLK: { GenTreeInitBlk* initBlkOp = treeNode->AsInitBlk(); switch (initBlkOp->gtBlkOpKind) { case GenTreeBlkOp::BlkOpKindHelper: genCodeForInitBlk(initBlkOp); break; case GenTreeBlkOp::BlkOpKindRepInstr: genCodeForInitBlkRepStos(initBlkOp); break; case GenTreeBlkOp::BlkOpKindUnroll: genCodeForInitBlkUnroll(initBlkOp); break; default: unreached(); } } break; case GT_JMPTABLE: genJumpTable(treeNode); break; case GT_SWITCH_TABLE: genTableBasedSwitch(treeNode); break; case GT_ARR_INDEX: genCodeForArrIndex(treeNode->AsArrIndex()); break; case GT_ARR_OFFSET: genCodeForArrOffset(treeNode->AsArrOffs()); break; case GT_CLS_VAR_ADDR: getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0); break; default: { #ifdef DEBUG char message[256]; sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet())); #endif assert(!"Unknown node in codegen"); } break; } } // Generate code for division (or mod) by power of two // or negative powers of two. (meaning -1 * a power of two, not 2^(-1)) // Op2 must be a contained integer constant. void CodeGen::genCodeForPow2Div(GenTreeOp* tree) { GenTree *dividend = tree->gtOp.gtOp1; GenTree *divisor = tree->gtOp.gtOp2; genTreeOps oper = tree->OperGet(); emitAttr size = emitTypeSize(tree); emitter *emit = getEmitter(); regNumber targetReg = tree->gtRegNum; var_types targetType = tree->TypeGet(); bool isSigned = oper == GT_MOD || oper == GT_DIV; // precondition: extended dividend is in RDX:RAX // which means it is either all zeros or all ones noway_assert(divisor->isContained()); GenTreeIntConCommon* divImm = divisor->AsIntConCommon(); ssize_t imm = divImm->IconValue(); ssize_t abs_imm = abs(imm); noway_assert(isPow2(abs_imm)); if (isSigned) { if (imm == 1) { if (oper == GT_DIV) { if (targetReg != REG_RAX) inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); } else { assert(oper == GT_MOD); instGen_Set_Reg_To_Zero(size, targetReg); } return; } if (abs_imm == 2) { if (oper == GT_MOD) { emit->emitIns_R_I(INS_and, size, REG_RAX, 1); // result is 0 or 1 // xor with rdx will flip all bits if negative emit->emitIns_R_R(INS_xor, size, REG_RAX, REG_RDX); // 111.11110 or 0 } else { assert(oper == GT_DIV); // add 1 if it's negative emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX); } } else { // add imm-1 if negative emit->emitIns_R_I(INS_and, size, REG_RDX, abs_imm - 1); emit->emitIns_R_R(INS_add, size, REG_RAX, REG_RDX); } if (oper == GT_DIV) { unsigned shiftAmount = genLog2(unsigned(abs_imm)); inst_RV_SH(INS_sar, size, REG_RAX, shiftAmount); if (imm < 0) { emit->emitIns_R(INS_neg, size, REG_RAX); } } else { assert(oper == GT_MOD); if (abs_imm > 2) { emit->emitIns_R_I(INS_and, size, REG_RAX, abs_imm - 1); } // RDX contains 'imm-1' if negative emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX); } if (targetReg != REG_RAX) { inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType); } } else { assert (imm > 0); if (targetReg != dividend->gtRegNum) { inst_RV_RV(INS_mov, targetReg, dividend->gtRegNum, targetType); } if (oper == GT_UDIV) { inst_RV_SH(INS_shr, size, targetReg, genLog2(unsigned(imm))); } else { assert(oper == GT_UMOD); emit->emitIns_R_I(INS_and, size, targetReg, imm -1); } } } /*********************************************************************************************** * Generate code for localloc */ void CodeGen::genLclHeap(GenTreePtr tree) { NYI_X86("Localloc"); assert(tree->OperGet() == GT_LCLHEAP); GenTreePtr size = tree->gtOp.gtOp1; noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); regNumber targetReg = tree->gtRegNum; regMaskTP tmpRegsMask = tree->gtRsvdRegs; regNumber regCnt = REG_NA; regNumber pspSymReg = REG_NA; var_types type = genActualType(size->gtType); emitAttr easz = emitTypeSize(type); BasicBlock* endLabel = nullptr; #ifdef DEBUG // Verify ESP if (compiler->opts.compStackCheckOnRet) { noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0); BasicBlock * esp_check = genCreateTempLabel(); inst_JMP(genJumpKindForOper(GT_EQ, true), esp_check); getEmitter()->emitIns(INS_BREAKPOINT); genDefineTempLabel(esp_check); } #endif noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes noway_assert(genStackLevel == 0); // Can't have anything on the stack // Whether method has PSPSym. bool hasPspSym; unsigned stackAdjustment = 0; BasicBlock* loop = NULL; #if FEATURE_EH_FUNCLETS hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM); #else hasPspSym = false; #endif // compute the amount of memory to allocate to properly STACK_ALIGN. size_t amount = 0; if (size->IsCnsIntOrI()) { // If size is a constant, then it must be contained. assert(size->isContained()); // If amount is zero then return null in targetReg amount = size->gtIntCon.gtIconVal; if (amount == 0) { instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg); goto BAILOUT; } // 'amount' is the total numbe of bytes to localloc to properly STACK_ALIGN amount = AlignUp(amount, STACK_ALIGN); } else { // If 0 bail out by returning null in targetReg genConsumeRegAndCopy(size, targetReg); endLabel = genCreateTempLabel(); getEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg); inst_JMP(EJ_je, endLabel); // Compute the size of the block to allocate and perform alignment. // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt, // since we don't need any internal registers. if (!hasPspSym && compiler->info.compInitMem) { assert(genCountBits(tmpRegsMask) == 0); regCnt = targetReg; } else { assert(genCountBits(tmpRegsMask) >= 1); regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); tmpRegsMask &= ~regCntMask; regCnt = genRegNumFromMask(regCntMask); if (regCnt != targetReg) inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet()); } // Align to STACK_ALIGN // regCnt will be the total number of bytes to localloc inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type)); inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type)); } #if FEATURE_EH_FUNCLETS // If we have PSPsym, then need to re-locate it after localloc. if (hasPspSym) { stackAdjustment += STACK_ALIGN; // Save a copy of PSPSym assert(genCountBits(tmpRegsMask) >= 1); regMaskTP pspSymRegMask = genFindLowestBit(tmpRegsMask); tmpRegsMask &= ~pspSymRegMask; pspSymReg = genRegNumFromMask(pspSymRegMask); getEmitter()->emitIns_R_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0); } #endif #if FEATURE_FIXED_OUT_ARGS // If we have an outgoing arg area then we must adjust the SP by popping off the // outgoing arg area. We will restore it right before we return from this method. // // Localloc is supposed to return stack space that is STACK_ALIGN'ed. The following // are the cases that needs to be handled: // i) Method has PSPSym + out-going arg area. // It is guaranteed that size of out-going arg area is STACK_ALIGNED (see fgMorphArgs). // Therefore, we will pop-off RSP upto out-going arg area before locallocating. // We need to add padding to ensure RSP is STACK_ALIGN'ed while re-locating PSPSym + arg area. // ii) Method has no PSPSym but out-going arg area. // Almost same case as above without the requirement to pad for the final RSP to be STACK_ALIGN'ed. // iii) Method has PSPSym but no out-going arg area. // Nothing to pop-off from the stack but needs to relocate PSPSym with SP padded. // iv) Method has neither PSPSym nor out-going arg area. // Nothing needs to popped off from stack nor relocated. if (compiler->lvaOutgoingArgSpaceSize > 0) { assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE); stackAdjustment += compiler->lvaOutgoingArgSpaceSize; } #endif if (size->IsCnsIntOrI()) { // We should reach here only for non-zero, constant size allocations. assert(amount > 0); // For small allocations we will generate up to six push 0 inline size_t cntPtrSizedWords = (amount >> STACK_ALIGN_SHIFT); if (cntPtrSizedWords <= 6) { while (cntPtrSizedWords != 0) { // push_hide means don't track the stack inst_IV(INS_push_hide, 0); cntPtrSizedWords--; } goto ALLOC_DONE; } else if (!compiler->info.compInitMem && (amount < CORINFO_PAGE_SIZE)) // must be < not <= { // Since the size is a page or less, simply adjust ESP // ESP might already be in the guard page, must touch it BEFORE // the alloc, not after. getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); inst_RV_IV(INS_sub, REG_SPBASE, amount, EA_PTRSIZE); goto ALLOC_DONE; } // else, "mov regCnt, amount" // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt. // Since size is a constant, regCnt is not yet initialized. assert(regCnt == REG_NA); if (!hasPspSym && compiler->info.compInitMem) { assert(genCountBits(tmpRegsMask) == 0); regCnt = targetReg; } else { assert(genCountBits(tmpRegsMask) >= 1); regMaskTP regCntMask = genFindLowestBit(tmpRegsMask); tmpRegsMask &= ~regCntMask; regCnt = genRegNumFromMask(regCntMask); } genSetRegToIcon(regCnt, amount, ((int)amount == amount)? TYP_INT : TYP_LONG); } loop = genCreateTempLabel(); if (compiler->info.compInitMem) { // At this point 'regCnt' is set to the total number of bytes to locAlloc. // Since we have to zero out the allocated memory AND ensure that RSP is always valid // by tickling the pages, we will just push 0's on the stack. // // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2 // and localloc size is a multiple of STACK_ALIGN. // Loop: genDefineTempLabel(loop); // dec is a 2 byte instruction, but sub is 4 (could be 3 if // we know size is TYP_INT instead of TYP_I_IMPL) // Also we know that we can only push 8 bytes at a time, but // alignment is 16 bytes, so we can push twice and do a sub // for just a little bit of loop unrolling inst_IV(INS_push_hide, 0); // --- push 0 inst_IV(INS_push_hide, 0); // --- push 0 // If not done, loop // Note that regCnt is the number of bytes to stack allocate. // Therefore we need to subtract 16 from regcnt here. assert(genIsValidIntReg(regCnt)); inst_RV_IV(INS_sub, regCnt, 16, emitActualTypeSize(type)); inst_JMP(EJ_jne, loop); } else { //At this point 'regCnt' is set to the total number of bytes to locAlloc. // //We don't need to zero out the allocated memory. However, we do have //to tickle the pages to ensure that ESP is always valid and is //in sync with the "stack guard page". Note that in the worst //case ESP is on the last byte of the guard page. Thus you must //touch ESP+0 first not ESP+x01000. // //Another subtlety is that you don't want ESP to be exactly on the //boundary of the guard page because PUSH is predecrement, thus //call setup would not touch the guard page but just beyond it // //Note that we go through a few hoops so that ESP never points to //illegal pages at any time during the ticking process // // neg REGCNT // add REGCNT, ESP // reg now holds ultimate ESP // jb loop // result is smaller than orignial ESP (no wrap around) // xor REGCNT, REGCNT, // Overflow, pick lowest possible number // loop: // test ESP, [ESP+0] // tickle the page // mov REGTMP, ESP // sub REGTMP, PAGE_SIZE // mov ESP, REGTMP // cmp ESP, REGCNT // jae loop // // mov ESP, REG // end: inst_RV(INS_NEG, regCnt, TYP_I_IMPL); inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL); inst_JMP(EJ_jb, loop); instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt); genDefineTempLabel(loop); // Tickle the decremented value, and move back to ESP, // note that it has to be done BEFORE the update of ESP since // ESP might already be on the guard page. It is OK to leave // the final value of ESP on the guard page getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); // This is a harmless trick to avoid the emitter trying to track the // decrement of the ESP - we do the subtraction in another reg instead // of adjusting ESP directly. assert(tmpRegsMask != RBM_NONE); assert(genCountBits(tmpRegsMask) == 1); regNumber regTmp = genRegNumFromMask(tmpRegsMask); inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); inst_RV_IV(INS_sub, regTmp, CORINFO_PAGE_SIZE, EA_PTRSIZE); inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL); inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL); inst_JMP(EJ_jae, loop); // Move the final value to ESP inst_RV_RV(INS_mov, REG_SPBASE, regCnt); } ALLOC_DONE: // Re-adjust SP to allocate PSPSym and out-going arg area if (stackAdjustment > 0) { assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned inst_RV_IV(INS_sub, REG_SPBASE, stackAdjustment, EA_PTRSIZE); #if FEATURE_EH_FUNCLETS // Write PSPSym to its new location. if (hasPspSym) { assert(genIsValidIntReg(pspSymReg)); getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0); } #endif } // Return the stackalloc'ed address in result register. // TargetReg = RSP + stackAdjustment. getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, targetReg, REG_SPBASE, stackAdjustment); BAILOUT: if (endLabel != nullptr) genDefineTempLabel(endLabel); // Write the lvaShadowSPfirst stack frame slot noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM); getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0); #if STACK_PROBES if (compiler->opts.compNeedStackProbes) { genGenerateStackProbe(); } #endif #ifdef DEBUG // Update new ESP if (compiler->opts.compStackCheckOnRet) { noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame); getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0); } #endif genProduceReg(tree); } // Generate code for InitBlk using rep stos. // Preconditions: // The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes. // Any value larger than that, we'll use the helper even if both the // fill byte and the size are integer constants. void CodeGen::genCodeForInitBlkRepStos(GenTreeInitBlk* initBlkNode) { // Make sure we got the arguments of the initblk/initobj operation in the right registers GenTreePtr blockSize = initBlkNode->Size(); GenTreePtr dstAddr = initBlkNode->Dest(); GenTreePtr initVal = initBlkNode->InitVal(); #ifdef DEBUG assert(!dstAddr->isContained()); assert(!initVal->isContained()); assert(!blockSize->isContained()); assert(blockSize->gtSkipReloadOrCopy()->IsCnsIntOrI()); size_t size = blockSize->gtIntCon.gtIconVal; if (initVal->IsCnsIntOrI()) { assert(size > INITBLK_UNROLL_LIMIT && size < INITBLK_STOS_LIMIT); } #endif // DEBUG genConsumeRegAndCopy(blockSize, REG_RCX); genConsumeRegAndCopy(initVal, REG_RAX); genConsumeRegAndCopy(dstAddr, REG_RDI); instGen(INS_r_stosb); } // Generate code for InitBlk by performing a loop unroll // Preconditions: // a) Both the size and fill byte value are integer constants. // b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes. // void CodeGen::genCodeForInitBlkUnroll(GenTreeInitBlk* initBlkNode) { // Make sure we got the arguments of the initblk/initobj operation in the right registers GenTreePtr blockSize = initBlkNode->Size(); GenTreePtr dstAddr = initBlkNode->Dest(); GenTreePtr initVal = initBlkNode->InitVal(); #ifdef DEBUG assert(!dstAddr->isContained()); assert(!initVal->isContained()); assert(blockSize->isContained()); assert(blockSize->IsCnsIntOrI()); #endif // DEBUG size_t size = blockSize->gtIntCon.gtIconVal; assert(size <= INITBLK_UNROLL_LIMIT); assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI()); emitter *emit = getEmitter(); genConsumeReg(initVal); genConsumeReg(dstAddr); // If the initVal was moved, or spilled and reloaded to a different register, // get the original initVal from below the GT_RELOAD, but only after capturing the valReg, // which needs to be the new register. regNumber valReg = initVal->gtRegNum; initVal = initVal->gtSkipReloadOrCopy(); unsigned offset = 0; // Perform an unroll using SSE2 loads and stores. if (size >= XMM_REGSIZE_BYTES) { regNumber tmpReg = genRegNumFromMask(initBlkNode->gtRsvdRegs); #ifdef DEBUG assert(initBlkNode->gtRsvdRegs != RBM_NONE); assert(genCountBits(initBlkNode->gtRsvdRegs) == 1); assert(genIsValidFloatReg(tmpReg)); #endif // DEBUG if (initVal->gtIntCon.gtIconVal != 0) { emit->emitIns_R_R(INS_mov_i2xmm, EA_8BYTE, tmpReg, valReg); emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg); } else { emit->emitIns_R_R(INS_xorpd, EA_8BYTE, tmpReg, tmpReg); } // Determine how many 16 byte slots we're going to fill using SSE movs. size_t slots = size / XMM_REGSIZE_BYTES; while (slots-- > 0) { emit->emitIns_AR_R(INS_movdqu, EA_8BYTE, tmpReg, dstAddr->gtRegNum, offset); offset += XMM_REGSIZE_BYTES; } } // Fill the remainder (or a < 16 byte sized struct) if ((size & 8) != 0) { #ifdef _TARGET_X86_ // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs. emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset); offset += 4; emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset); offset += 4; #else // !_TARGET_X86_ emit->emitIns_AR_R(INS_mov, EA_8BYTE, valReg, dstAddr->gtRegNum, offset); offset += 8; #endif // !_TARGET_X86_ } if ((size & 4) != 0) { emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset); offset += 4; } if ((size & 2) != 0) { emit->emitIns_AR_R(INS_mov, EA_2BYTE, valReg, dstAddr->gtRegNum, offset); offset += 2; } if ((size & 1) != 0) { emit->emitIns_AR_R(INS_mov, EA_1BYTE, valReg, dstAddr->gtRegNum, offset); } } // Generates code for InitBlk by calling the VM memset helper function. // Preconditions: // a) The size argument of the InitBlk is not an integer constant. // b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes. void CodeGen::genCodeForInitBlk(GenTreeInitBlk* initBlkNode) { #ifdef _TARGET_AMD64_ // Make sure we got the arguments of the initblk operation in the right registers GenTreePtr blockSize = initBlkNode->Size(); GenTreePtr dstAddr = initBlkNode->Dest(); GenTreePtr initVal = initBlkNode->InitVal(); #ifdef DEBUG assert(!dstAddr->isContained()); assert(!initVal->isContained()); assert(!blockSize->isContained()); if (blockSize->IsCnsIntOrI()) { assert(blockSize->gtIntCon.gtIconVal >= INITBLK_STOS_LIMIT); } #endif // DEBUG genConsumeRegAndCopy(blockSize, REG_ARG_2); genConsumeRegAndCopy(initVal, REG_ARG_1); genConsumeRegAndCopy(dstAddr, REG_ARG_0); genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); #else // !_TARGET_AMD64_ NYI_X86("Helper call for InitBlk"); #endif // !_TARGET_AMD64_ } // Generate code for a load from some address + offset // base: tree node which can be either a local address or arbitrary node // offset: distance from the base from which to load void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset) { emitter *emit = getEmitter(); if (base->OperIsLocalAddr()) { if (base->gtOper == GT_LCL_FLD_ADDR) offset += base->gtLclFld.gtLclOffs; emit->emitIns_R_S(ins, size, dst, base->gtLclVarCommon.gtLclNum, offset); } else { emit->emitIns_R_AR(ins, size, dst, base->gtRegNum, offset); } } // Generate code for a store to some address + offset // base: tree node which can be either a local address or arbitrary node // offset: distance from the base from which to load void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset) { emitter *emit = getEmitter(); if (base->OperIsLocalAddr()) { if (base->gtOper == GT_LCL_FLD_ADDR) offset += base->gtLclFld.gtLclOffs; emit->emitIns_S_R(ins, size, src, base->gtLclVarCommon.gtLclNum, offset); } else { emit->emitIns_AR_R(ins, size, src, base->gtRegNum, offset); } } // Generates CpBlk code by performing a loop unroll // Preconditions: // The size argument of the CpBlk node is a constant and <= 64 bytes. // This may seem small but covers >95% of the cases in several framework assemblies. // void CodeGen::genCodeForCpBlkUnroll(GenTreeCpBlk* cpBlkNode) { // Make sure we got the arguments of the cpblk operation in the right registers GenTreePtr blockSize = cpBlkNode->Size(); GenTreePtr dstAddr = cpBlkNode->Dest(); GenTreePtr srcAddr = cpBlkNode->Source(); assert(blockSize->IsCnsIntOrI()); size_t size = blockSize->gtIntCon.gtIconVal; assert(size <= CPBLK_UNROLL_LIMIT); emitter *emit = getEmitter(); if (!srcAddr->isContained()) genConsumeReg(srcAddr); if (!dstAddr->isContained()) genConsumeReg(dstAddr); unsigned offset = 0; // If the size of this struct is larger than 16 bytes // let's use SSE2 to be able to do 16 byte at a time // loads and stores. if (size >= XMM_REGSIZE_BYTES) { assert(cpBlkNode->gtRsvdRegs != RBM_NONE); regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLFLOAT); assert(genIsValidFloatReg(xmmReg)); size_t slots = size / XMM_REGSIZE_BYTES; while (slots-- > 0) { // Load genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset); // Store genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset); offset += XMM_REGSIZE_BYTES; } } // Fill the remainder (15 bytes or less) if there's one. if ((size & 0xf) != 0) { // Grab the integer temp register to emit the remaining loads and stores. regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT); if ((size & 8) != 0) { #ifdef _TARGET_X86_ // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs. for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4) { genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset); genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset); } #else // !_TARGET_X86_ genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset); genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset); offset += 8; #endif // !_TARGET_X86_ } if ((size & 4) != 0) { genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset); genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset); offset += 4; } if ((size & 2) != 0) { genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset); genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset); offset += 2; } if ((size & 1) != 0) { genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset); genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset); } } } // Generate code for CpBlk by using rep movs // Preconditions: // The size argument of the CpBlk is a constant and is between // CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes. void CodeGen::genCodeForCpBlkRepMovs(GenTreeCpBlk* cpBlkNode) { // Make sure we got the arguments of the cpblk operation in the right registers GenTreePtr blockSize = cpBlkNode->Size(); GenTreePtr dstAddr = cpBlkNode->Dest(); GenTreePtr srcAddr = cpBlkNode->Source(); #ifdef DEBUG assert(!dstAddr->isContained()); assert(!srcAddr->isContained()); assert(!blockSize->isContained()); assert(blockSize->IsCnsIntOrI()); size_t size = blockSize->gtIntCon.gtIconVal; #ifdef _TARGET_X64_ assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT); #else assert(size > CPBLK_UNROLL_LIMIT); #endif #endif // DEBUG genConsumeRegAndCopy(blockSize, REG_RCX); genConsumeRegAndCopy(srcAddr, REG_RSI); genConsumeRegAndCopy(dstAddr, REG_RDI); instGen(INS_r_movsb); } // Generate code for CpObj nodes wich copy structs that have interleaved // GC pointers. // This will generate a sequence of movsq instructions for the cases of non-gc members // and calls to the BY_REF_ASSIGN helper otherwise. void CodeGen::genCodeForCpObj(GenTreeCpObj* cpObjNode) { // Make sure we got the arguments of the cpobj operation in the right registers GenTreePtr clsTok = cpObjNode->ClsTok(); GenTreePtr dstAddr = cpObjNode->Dest(); GenTreePtr srcAddr = cpObjNode->Source(); bool dstOnStack = dstAddr->OperIsLocalAddr(); #ifdef DEBUG bool isRepMovsqUsed = false; assert(!dstAddr->isContained()); assert(!srcAddr->isContained()); // If the GenTree node has data about GC pointers, this means we're dealing // with CpObj, so this requires special logic. assert(cpObjNode->gtGcPtrCount > 0); // MovSq instruction is used for copying non-gcref fields and it needs // src = RSI and dst = RDI. // Either these registers must not contain lclVars, or they must be dying or marked for spill. // This is because these registers are incremented as we go through the struct. if (srcAddr->gtRegNum == REG_RSI) { assert(!genIsRegCandidateLocal(srcAddr) || (srcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) != 0); } if (dstAddr->gtRegNum == REG_RDI) { assert(!genIsRegCandidateLocal(dstAddr) || (dstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) != 0); } #endif // DEBUG // Consume these registers. // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing"). genConsumeRegAndCopy(srcAddr, REG_RSI); gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet()); genConsumeRegAndCopy(dstAddr, REG_RDI); gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet()); unsigned slots = cpObjNode->gtSlots; // If we can prove it's on the stack we don't need to use the write barrier. if (dstOnStack) { if (slots >= CPOBJ_NONGC_SLOTS_LIMIT) { #ifdef DEBUG // If the destination of the CpObj is on the stack // make sure we allocated RCX to emit rep movsq. regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT); assert(tmpReg == REG_RCX); isRepMovsqUsed = true; #endif // DEBUG getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots); instGen(INS_r_movsq); } else { // For small structs, it's better to emit a sequence of movsq than to // emit a rep movsq instruction. while (slots > 0) { instGen(INS_movsq); slots--; } } } else { BYTE* gcPtrs = cpObjNode->gtGcPtrs; unsigned gcPtrCount = cpObjNode->gtGcPtrCount; unsigned i = 0; while (i < slots) { switch (gcPtrs[i]) { case TYPE_GC_NONE: // Let's see if we can use rep movsq instead of a sequence of movsq instructions // to save cycles and code size. { unsigned nonGcSlotCount = 0; do { nonGcSlotCount++; i++; } while (i < slots && gcPtrs[i] == TYPE_GC_NONE); // If we have a very small contiguous non-gc region, it's better just to // emit a sequence of movsq instructions if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT) { while (nonGcSlotCount > 0) { instGen(INS_movsq); nonGcSlotCount--; } } else { #ifdef DEBUG // Otherwise, we can save code-size and improve CQ by emitting // rep movsq regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT); assert(tmpReg == REG_RCX); isRepMovsqUsed = true; #endif // DEBUG getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount); instGen(INS_r_movsq); } } break; default: // We have a GC pointer, call the memory barrier. genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); gcPtrCount--; i++; } } #ifdef DEBUG if (!isRepMovsqUsed) { assert(clsTok->isContained()); } assert(gcPtrCount == 0); #endif // DEBUG } // Clear the gcInfo for RSI and RDI. // While we normally update GC info prior to the last instruction that uses them, // these actually live into the helper call. gcInfo.gcMarkRegSetNpt(RBM_RSI); gcInfo.gcMarkRegSetNpt(RBM_RDI); } // Generate code for a CpBlk node by the means of the VM memcpy helper call // Preconditions: // a) The size argument of the CpBlk is not an integer constant // b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes. void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode) { #ifdef _TARGET_AMD64_ // Make sure we got the arguments of the cpblk operation in the right registers GenTreePtr blockSize = cpBlkNode->Size(); GenTreePtr dstAddr = cpBlkNode->Dest(); GenTreePtr srcAddr = cpBlkNode->Source(); assert(!dstAddr->isContained()); assert(!srcAddr->isContained()); assert(!blockSize->isContained()); #ifdef DEBUG if (blockSize->IsCnsIntOrI()) { assert(blockSize->gtIntCon.gtIconVal >= CPBLK_MOVS_LIMIT); } #endif // DEBUG genConsumeRegAndCopy(blockSize, REG_ARG_2); genConsumeRegAndCopy(srcAddr, REG_ARG_1); genConsumeRegAndCopy(dstAddr, REG_ARG_0); genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN); #else // !_TARGET_AMD64_ NYI_X86("Helper call for CpBlk"); #endif // !_TARGET_AMD64_ } // generate code do a switch statement based on a table of ip-relative offsets void CodeGen::genTableBasedSwitch(GenTree* treeNode) { genConsumeOperands(treeNode->AsOp()); regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum; regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum; regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); // load the ip-relative offset (which is relative to start of fgFirstBB) getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0); // add it to the absolute address of fgFirstBB compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET; getEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, compiler->fgFirstBB, tmpReg); getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg); // jmp baseReg getEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg); } // emits the table and an instruction to get the address of the first element void CodeGen::genJumpTable(GenTree* treeNode) { noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH); assert(treeNode->OperGet() == GT_JMPTABLE); unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount; BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab; unsigned jmpTabOffs; unsigned jmpTabBase; jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true); jmpTabOffs = 0; JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase); for (unsigned i=0; ibbFlags & BBF_JMP_TARGET); JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum); getEmitter()->emitDataGenData(i, target); }; getEmitter()->emitDataGenEnd(); // Access to inline data is 'abstracted' by a special type of static member // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference // to constant data, not a real static field. getEmitter()->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), treeNode->gtRegNum, compiler->eeFindJitDataOffs(jmpTabBase), 0); genProduceReg(treeNode); } // generate code for the locked operations: // GT_LOCKADD, GT_XCHG, GT_XADD void CodeGen::genLockedInstructions(GenTree* treeNode) { GenTree* data = treeNode->gtOp.gtOp2; GenTree* addr = treeNode->gtOp.gtOp1; regNumber targetReg = treeNode->gtRegNum; regNumber dataReg = data->gtRegNum; regNumber addrReg = addr->gtRegNum; instruction ins; // all of these nodes implicitly do an indirection on op1 // so create a temporary node to feed into the pattern matching GenTreeIndir i = indirForm(data->TypeGet(), addr); genConsumeReg(addr); // The register allocator should have extended the lifetime of the address // so that it is not used as the target. noway_assert(addrReg != targetReg); // If data is a lclVar that's not a last use, we'd better have allocated a register // for the result (except in the case of GT_LOCKADD which does not produce a register result). assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) || (data->gtFlags & GTF_VAR_DEATH) != 0); genConsumeIfReg(data); if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg) { inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg); data->gtRegNum = targetReg; // TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the // original gtRegNum on data, after calling emitInsBinary below. } switch (treeNode->OperGet()) { case GT_LOCKADD: instGen(INS_lock); ins = INS_add; break; case GT_XCHG: // lock is implied by xchg ins = INS_xchg; break; case GT_XADD: instGen(INS_lock); ins = INS_xadd; break; default: unreached(); } getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data); if (treeNode->gtRegNum != REG_NA) { genProduceReg(treeNode); } } // generate code for BoundsCheck nodes void CodeGen::genRangeCheck(GenTreePtr oper) { #ifdef FEATURE_SIMD noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK); #else // !FEATURE_SIMD noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK); #endif // !FEATURE_SIMD GenTreeBoundsChk* bndsChk = oper->AsBoundsChk(); GenTreePtr arrLen = bndsChk->gtArrLen; GenTreePtr arrIndex = bndsChk->gtIndex; GenTreePtr arrRef = NULL; int lenOffset = 0; GenTree *src1, *src2; emitJumpKind jmpKind; genConsumeRegs(arrLen); genConsumeRegs(arrIndex); if (arrIndex->isContainedIntOrIImmed()) { src1 = arrLen; src2 = arrIndex; jmpKind = EJ_jbe; } else { src1 = arrIndex; src2 = arrLen; jmpKind = EJ_jae; } #if DEBUG var_types bndsChkType = src2->TypeGet(); // Bounds checks can only be 32 or 64 bit sized comparisons. assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG); // The type of the bounds check should always wide enough to compare against the index. assert(emitTypeSize(bndsChkType) >= emitTypeSize(src1->TypeGet())); #endif //DEBUG getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(src2->TypeGet()), src1, src2); genJumpToThrowHlpBlk(jmpKind, Compiler::ACK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB); } //------------------------------------------------------------------------ // genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the // lower bound for the given dimension. // // Arguments: // elemType - the element type of the array // rank - the rank of the array // dimension - the dimension for which the lower bound offset will be returned. // // Return Value: // The offset. unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension) { // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets. return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank); } //------------------------------------------------------------------------ // genOffsetOfMDArrayLength: Returns the offset from the Array object to the // size for the given dimension. // // Arguments: // elemType - the element type of the array // rank - the rank of the array // dimension - the dimension for which the lower bound offset will be returned. // // Return Value: // The offset. unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension) { // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets. return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension; } //------------------------------------------------------------------------ // genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference, // producing the effective index by subtracting the lower bound. // // Arguments: // arrIndex - the node for which we're generating code // // Return Value: // None. // void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) { GenTreePtr arrObj = arrIndex->ArrObj(); GenTreePtr indexNode = arrIndex->IndexExpr(); regNumber arrReg = genConsumeReg(arrObj); regNumber indexReg = genConsumeReg(indexNode); regNumber tgtReg = arrIndex->gtRegNum; unsigned dim = arrIndex->gtCurrDim; unsigned rank = arrIndex->gtArrRank; var_types elemType = arrIndex->gtArrElemType; noway_assert(tgtReg != REG_NA); // Subtract the lower bound for this dimension. // TODO-XArch-CQ: make this contained if it's an immediate that fits. if (tgtReg != indexReg) { inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet()); } getEmitter()->emitIns_R_AR(INS_sub, emitActualTypeSize(TYP_INT), tgtReg, arrReg, genOffsetOfMDArrayLowerBound(elemType, rank, dim)); getEmitter()->emitIns_R_AR(INS_cmp, emitActualTypeSize(TYP_INT), tgtReg, arrReg, genOffsetOfMDArrayDimensionSize(elemType, rank, dim)); genJumpToThrowHlpBlk(EJ_jae, Compiler::ACK_RNGCHK_FAIL); genProduceReg(arrIndex); } //------------------------------------------------------------------------ // genCodeForArrOffset: Generates code to compute the flattened array offset for // one dimension of an array reference: // result = (prevDimOffset * dimSize) + effectiveIndex // where dimSize is obtained from the arrObj operand // // Arguments: // arrOffset - the node for which we're generating code // // Return Value: // None. // // Notes: // dimSize and effectiveIndex are always non-negative, the former by design, // and the latter because it has been normalized to be zero-based. void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset) { GenTreePtr offsetNode = arrOffset->gtOffset; GenTreePtr indexNode = arrOffset->gtIndex; GenTreePtr arrObj = arrOffset->gtArrObj; regNumber tgtReg = arrOffset->gtRegNum; noway_assert(tgtReg != REG_NA); unsigned dim = arrOffset->gtCurrDim; unsigned rank = arrOffset->gtArrRank; var_types elemType = arrOffset->gtArrElemType; // We will use a temp register for the offset*scale+effectiveIndex computation. regMaskTP tmpRegMask = arrOffset->gtRsvdRegs; regNumber tmpReg = genRegNumFromMask(tmpRegMask); if (!offsetNode->IsZero()) { // Evaluate tgtReg = offsetReg*dim_size + indexReg. // tmpReg is used to load dim_size and the result of the multiplication. // Note that dim_size will never be negative. regNumber offsetReg = genConsumeReg(offsetNode); regNumber indexReg = genConsumeReg(indexNode); regNumber arrReg = genConsumeReg(arrObj); getEmitter()->emitIns_R_AR(INS_mov, emitActualTypeSize(TYP_INT), tmpReg, arrReg, genOffsetOfMDArrayDimensionSize(elemType, rank, dim)); inst_RV_RV(INS_imul, tmpReg, offsetReg); if (tmpReg == tgtReg) { inst_RV_RV(INS_add, tmpReg, indexReg); } else { if (indexReg != tgtReg) { inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL); } inst_RV_RV(INS_add, tgtReg, tmpReg); } } else { regNumber indexReg = genConsumeReg(indexNode); if (indexReg != tgtReg) { inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT); } } genProduceReg(arrOffset); } // make a temporary indir we can feed to pattern matching routines // in cases where we don't want to instantiate all the indirs that happen // GenTreeIndir CodeGen::indirForm(var_types type, GenTree *base) { GenTreeIndir i(GT_IND, type, base, nullptr); i.gtRegNum = REG_NA; // has to be nonnull (because contained nodes can't be the last in block) // but don't want it to be a valid pointer i.gtNext = (GenTree *)(-1); return i; } // make a temporary int we can feed to pattern matching routines // in cases where we don't want to instantiate // GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value) { GenTreeIntCon i(type, value); i.gtRegNum = REG_NA; // has to be nonnull (because contained nodes can't be the last in block) // but don't want it to be a valid pointer i.gtNext = (GenTree *)(-1); return i; } instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) { instruction ins; // Operations on SIMD vectors shouldn't come this path assert(!varTypeIsSIMD(type)); if (varTypeIsFloating(type)) { return ins_MathOp(oper, type); } switch (oper) { case GT_ADD: ins = INS_add; break; case GT_AND: ins = INS_and; break; case GT_MUL: ins = INS_imul; break; case GT_LSH: ins = INS_shl; break; case GT_NEG: ins = INS_neg; break; case GT_NOT: ins = INS_not; break; case GT_OR: ins = INS_or; break; case GT_RSH: ins = INS_sar; break; case GT_RSZ: ins = INS_shr; break; case GT_SUB: ins = INS_sub; break; case GT_XOR: ins = INS_xor; break; default: unreached(); break; } return ins; } /** Generates the code sequence for a GenTree node that * represents a bit shift operation (<<, >>, >>>). * * Arguments: operand: the value to be shifted by shiftBy bits. * shiftBy: the number of bits to shift the operand. * parent: the actual bitshift node (that specifies the * type of bitshift to perform. * * Preconditions: a) All GenTrees are register allocated. * b) Either shiftBy is a contained constant or * it's an expression sitting in RCX. */ void CodeGen::genCodeForShift(GenTreePtr operand, GenTreePtr shiftBy, GenTreePtr parent) { var_types targetType = parent->TypeGet(); genTreeOps oper = parent->OperGet(); instruction ins = genGetInsForOper(oper, targetType); GenTreePtr actualOperand = operand->gtSkipReloadOrCopy(); bool isRMW = parent->gtOp.gtOp1->isContained(); assert(parent->gtRegNum != REG_NA || isRMW); regNumber operandReg = REG_NA; regNumber indexReg = REG_NA; int offset = 0; emitAttr attr = EA_UNKNOWN; bool isClsVarAddr = (operand->OperGet() == GT_CLS_VAR_ADDR);; if (!isRMW) { operandReg = genConsumeReg(operand); } else { targetType = parent->gtOp.gtOp1->TypeGet(); if (actualOperand->OperGet() == GT_LCL_VAR) { operandReg = operand->gtRegNum; } else if (actualOperand->OperGet() == GT_LEA) { operandReg = actualOperand->gtOp.gtOp1->gtRegNum; GenTreeAddrMode* addrMode = actualOperand->AsAddrMode(); offset = addrMode->gtOffset; if(addrMode->Index() != nullptr) { indexReg = addrMode->Index()->gtRegNum; // GT_LEA with an indexReg is not supported for shift by immediate assert(!shiftBy->isContainedIntOrIImmed()); } } else { // The only other supported operand for RMW is GT_CLS_VAR_ADDR assert(actualOperand->OperGet() == GT_CLS_VAR_ADDR); // We don't expect to see GT_COPY or GT_RELOAD for GT_CLS_VAR_ADDR // so 'actualOperand' should be the same as 'operand' assert(operand == actualOperand); } attr = EA_ATTR(genTypeSize(targetType)); } if (shiftBy->isContainedIntOrIImmed()) { int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue(); if (!isRMW) { // First, move the operand to the destination register and // later on perform the shift in-place. // (LSRA will try to avoid this situation through preferencing.) if (parent->gtRegNum != operandReg) { inst_RV_RV(INS_mov, parent->gtRegNum, operandReg, targetType); } inst_RV_SH(ins, emitTypeSize(parent), parent->gtRegNum, shiftByValue); } else { if (isClsVarAddr && shiftByValue == 1) { switch (ins) { case INS_sar: ins = INS_sar_1; break; case INS_shl: ins = INS_shl_1; break; case INS_shr: ins = INS_shr_1; break; } getEmitter()->emitIns_C(ins, attr, operand->gtClsVar.gtClsVarHnd, 0); } else { switch (ins) { case INS_sar: ins = INS_sar_N; break; case INS_shl: ins = INS_shl_N; break; case INS_shr: ins = INS_shr_N; break; } if (isClsVarAddr) { getEmitter()->emitIns_C_I(ins, attr, operand->gtClsVar.gtClsVarHnd, 0, shiftByValue); } else { getEmitter()->emitIns_I_AR(ins, attr, shiftByValue, operandReg, offset); } } } } else { // We must have the number of bits to shift // stored in ECX, since we constrained this node to // sit in ECX, in case this didn't happen, LSRA expects // the code generator to move it since it's a single // register destination requirement. regNumber shiftReg = genConsumeReg(shiftBy); if (shiftReg != REG_RCX) { // Issue the mov to RCX: inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet()); shiftReg = REG_RCX; } // The operand to be shifted must not be in ECX noway_assert(operandReg != REG_RCX); if (isRMW) { if (isClsVarAddr) { getEmitter()->emitIns_C_R(ins, attr, operand->gtClsVar.gtClsVarHnd, shiftReg, 0); } else { getEmitter()->emitIns_AR_R(ins, attr, indexReg, operandReg, offset); } } else { if (parent->gtRegNum != operandReg) { inst_RV_RV(INS_mov, parent->gtRegNum, operandReg, targetType); } inst_RV_CL(ins, parent->gtRegNum, targetType); } } genProduceReg(parent); } void CodeGen::genUnspillRegIfNeeded(GenTree *tree) { regNumber dstReg = tree->gtRegNum; GenTree* unspillTree = tree; if (tree->gtOper == GT_RELOAD) { unspillTree = tree->gtOp.gtOp1; } if (unspillTree->gtFlags & GTF_SPILLED) { if (genIsRegCandidateLocal(unspillTree)) { // Reset spilled flag, since we are going to load a local variable from its home location. unspillTree->gtFlags &= ~GTF_SPILLED; GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon(); LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; // Load local variable from its home location. inst_RV_TT(ins_Load(unspillTree->gtType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree); unspillTree->SetInReg(); // TODO-Review: We would like to call: // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree)); // instead of the following code, but this ends up hitting this assert: // assert((regSet.rsMaskVars & regMask) == 0); // due to issues with LSRA resolution moves. // So, just force it for now. This probably indicates a condition that creates a GC hole! // // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove, // because the variable is not really going live or dead, but that method is somewhat poorly // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo. // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp. // Don't update the variable's location if we are just re-spilling it again. if ((unspillTree->gtFlags & GTF_SPILL) == 0) { genUpdateVarReg(varDsc, tree); #ifdef DEBUG if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) { JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum); } #endif // DEBUG VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); #ifdef DEBUG if (compiler->verbose) { printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum); varDsc->PrintVarReg(); printf(" is becoming live "); compiler->printTreeID(unspillTree); printf("\n"); } #endif // DEBUG regSet.rsMaskVars |= genGetRegMask(varDsc); } } else { TempDsc* t = regSet.rsUnspillInPlace(unspillTree); getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->gtType), dstReg, t->tdTempNum(), 0); compiler->tmpRlsTemp(t); unspillTree->gtFlags &= ~GTF_SPILLED; unspillTree->SetInReg(); } gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet()); } } // Do Liveness update for a subnodes that is being consumed by codegen // including the logic for reload in case is needed and also takes care // of locating the value on the desired register. void CodeGen::genConsumeRegAndCopy(GenTree *tree, regNumber needReg) { regNumber treeReg = genConsumeReg(tree); if (treeReg != needReg) { inst_RV_RV(INS_mov, needReg, treeReg, tree->TypeGet()); } } void CodeGen::genRegCopy(GenTree* treeNode) { assert(treeNode->OperGet() == GT_COPY); var_types targetType = treeNode->TypeGet(); regNumber targetReg = treeNode->gtRegNum; assert(targetReg != REG_NA); GenTree* op1 = treeNode->gtOp.gtOp1; // Check whether this node and the node from which we're copying the value have the same // register type. // This can happen if (currently iff) we have a SIMD vector type that fits in an integer // register, in which case it is passed as an argument, or returned from a call, // in an integer register and must be copied if it's in an xmm register. if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1)) { instruction ins; regNumber fpReg; regNumber intReg; if(varTypeIsFloating(treeNode)) { ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet()); fpReg = targetReg; intReg = op1->gtRegNum; } else { ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet()); intReg = targetReg; fpReg = op1->gtRegNum; } inst_RV_RV(ins, fpReg, intReg, targetType); } else { inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType); } if (op1->IsLocal()) { // The lclVar will never be a def. // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will // appropriately set the gcInfo for the copied value. // If not, there are two cases we need to handle: // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable // will remain live in its original register. // genProduceReg() will appropriately set the gcInfo for the copied value, // and genConsumeReg will reset it. // - Otherwise, we need to update register info for the lclVar. GenTreeLclVarCommon* lcl = op1->AsLclVarCommon(); assert((lcl->gtFlags & GTF_VAR_DEF) == 0); if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0) { LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum]; // If we didn't just spill it (in genConsumeReg, above), then update the register info if (varDsc->lvRegNum != REG_STK) { // The old location is dying genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1)); gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum)); genUpdateVarReg(varDsc, treeNode); // The new location is going live genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode)); } } } genProduceReg(treeNode); } // Do liveness update for a subnode that is being consumed by codegen. regNumber CodeGen::genConsumeReg(GenTree *tree) { if (tree->OperGet() == GT_COPY) { genRegCopy(tree); } // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it // interferes with one of the other sources (or the target, if it's a "delayed use" register)). // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and // always using GT_COPY to make the lclVar location explicit. // Note that we have to do this before calling genUpdateLife because otherwise if we spill it // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds // the lclVar (normally when a lclVar is spilled it is then used from its former register // location, which matches the gtRegNum on the node). // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded // because if it's on the stack it will always get reloaded into tree->gtRegNum). if (genIsRegCandidateLocal(tree)) { GenTreeLclVarCommon *lcl = tree->AsLclVarCommon(); LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum) { inst_RV_RV(INS_mov, tree->gtRegNum, varDsc->lvRegNum); } } genUnspillRegIfNeeded(tree); // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar genUpdateLife(tree); assert(tree->gtRegNum != REG_NA); // there are three cases where consuming a reg means clearing the bit in the live mask // 1. it was not produced by a local // 2. it was produced by a local that is going dead // 3. it was produced by a local that does not live in that reg (like one allocated on the stack) if (genIsRegCandidateLocal(tree)) { GenTreeLclVarCommon *lcl = tree->AsLclVarCommon(); LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; assert(varDsc->lvLRACandidate); if ((tree->gtFlags & GTF_VAR_DEATH) != 0) { gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum)); } else if (varDsc->lvRegNum == REG_STK) { // We have loaded this into a register only temporarily gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); } } else { gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); } return tree->gtRegNum; } // Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect). void CodeGen::genConsumeAddress(GenTree* addr) { if (addr->OperGet() == GT_LEA) { genConsumeAddrMode(addr->AsAddrMode()); } else if (!addr->isContained()) { genConsumeReg(addr); } } // do liveness update for a subnode that is being consumed by codegen void CodeGen::genConsumeAddrMode(GenTreeAddrMode *addr) { if (addr->Base()) genConsumeReg(addr->Base()); if (addr->Index()) genConsumeReg(addr->Index()); } void CodeGen::genConsumeRegs(GenTree* tree) { #if !defined(_TARGET_64BIT_) if (tree->OperGet() == GT_LONG) { genConsumeRegs(tree->gtGetOp1()); genConsumeRegs(tree->gtGetOp2()); return; } #endif // !defined(_TARGET_64BIT_) if (tree->isContained()) { if (tree->isIndir()) { genConsumeAddress(tree->AsIndir()->Addr()); } else if (tree->OperGet() == GT_AND) { // This is the special contained GT_AND that we created in Lowering::LowerCmp() // Now we need to consume the operands of the GT_AND node. genConsumeOperands(tree->AsOp()); } else { assert(tree->OperIsLeaf()); } } else { genConsumeReg(tree); } } //------------------------------------------------------------------------ // genConsumeOperands: Do liveness update for the operands of a unary or binary tree // // Arguments: // tree - the GenTreeOp whose operands will have their liveness updated. // // Return Value: // None. // // Notes: // Note that this logic is localized here because we must do the liveness update in // the correct execution order. This is important because we may have two operands // that involve the same lclVar, and if one is marked "lastUse" we must handle it // after the first. void CodeGen::genConsumeOperands(GenTreeOp* tree) { GenTree* firstOp = tree->gtOp1; GenTree* secondOp = tree->gtOp2; if ((tree->gtFlags & GTF_REVERSE_OPS) != 0) { assert(secondOp != nullptr); firstOp = secondOp; secondOp = tree->gtOp1; } if (firstOp != nullptr) { genConsumeRegs(firstOp); } if (secondOp != nullptr) { genConsumeRegs(secondOp); } } // do liveness update for register produced by the current node in codegen void CodeGen::genProduceReg(GenTree *tree) { if (tree->gtFlags & GTF_SPILL) { if (genIsRegCandidateLocal(tree)) { // Store local variable to its home location. tree->gtFlags &= ~GTF_REG_VAL; inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(tree->gtLclVarCommon.gtLclNum)), tree, tree->gtRegNum); } else { tree->SetInReg(); regSet.rsSpillTree(tree->gtRegNum, tree); tree->gtFlags |= GTF_SPILLED; tree->gtFlags &= ~GTF_SPILL; gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum)); return; } } genUpdateLife(tree); // If we've produced a register, mark it as a pointer, as needed. if (tree->gtHasReg()) { // We only mark the register in the following cases: // 1. It is not a register candidate local. In this case, we're producing a // register from a local, but the local is not a register candidate. Thus, // we must be loading it as a temp register, and any "last use" flag on // the register wouldn't be relevant. // 2. The register candidate local is going dead. There's no point to mark // the register as live, with a GC pointer, if the variable is dead. if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0)) { gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet()); } } tree->SetInReg(); } // transfer gc/byref status of src reg to dst reg void CodeGen::genTransferRegGCState(regNumber dst, regNumber src) { regMaskTP srcMask = genRegMask(src); regMaskTP dstMask = genRegMask(dst); if (gcInfo.gcRegGCrefSetCur & srcMask) { gcInfo.gcMarkRegSetGCref(dstMask); } else if (gcInfo.gcRegByrefSetCur & srcMask) { gcInfo.gcMarkRegSetByref(dstMask); } else { gcInfo.gcMarkRegSetNpt(dstMask); } } // generates an ip-relative call or indirect call via reg ('call reg') // pass in 'addr' for a relative call or 'base' for a indirect register call // methHnd - optional, only used for pretty printing // retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC) void CodeGen::genEmitCall(int callType, CORINFO_METHOD_HANDLE methHnd, INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize), emitAttr retSize, IL_OFFSETX ilOffset, regNumber base, bool isJump, bool isNoGC) { #ifndef _TARGET_X86_ ssize_t argSize = 0; #endif // !_TARGET_X86_ getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump, emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd))); } // generates an indirect call via addressing mode (call []) given an indir node // methHnd - optional, only used for pretty printing // retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC) void CodeGen::genEmitCall(int callType, CORINFO_METHOD_HANDLE methHnd, INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize), emitAttr retSize, IL_OFFSETX ilOffset) { #ifndef _TARGET_X86_ ssize_t argSize = 0; #endif // !_TARGET_X86_ genConsumeAddress(indir->Addr()); getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, indir->Base() ? indir->Base()->gtRegNum : REG_NA, indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset()); } // Produce code for a GT_CALL node void CodeGen::genCallInstruction(GenTreePtr node) { GenTreeCall *call = node->AsCall(); assert(call->gtOper == GT_CALL); gtCallTypes callType = (gtCallTypes)call->gtCallType; IL_OFFSETX ilOffset = BAD_IL_OFFSET; // all virtuals should have been expanded into a control expression assert (!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr); // Consume all the arg regs for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext()) { assert(list->IsList()); GenTreePtr argNode = list->Current(); fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy()); assert(curArgTabEntry); if (curArgTabEntry->regNum == REG_STK) continue; regNumber argReg = curArgTabEntry->regNum; genConsumeReg(argNode); if (argNode->gtRegNum != argReg) { inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum); } // In the case of a varargs call, // the ABI dictates that if we have floating point args, // we must pass the enregistered arguments in both the // integer and floating point registers so, let's do that. if (call->IsVarargs() && varTypeIsFloating(argNode)) { regNumber targetReg = compiler->getCallArgIntRegister(argNode->gtRegNum); instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG); inst_RV_RV(ins, argNode->gtRegNum, targetReg); } } #ifdef _TARGET_X86_ // The call will pop its arguments. // for each putarg_stk: ssize_t stackArgBytes = 0; GenTreePtr args = call->gtCallArgs; while (args) { GenTreePtr arg = args->gtOp.gtOp1; if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG)) { assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG)); if (arg->OperGet() == GT_LONG) { assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK)); } stackArgBytes += genTypeSize(genActualType(arg->TypeGet())); } args = args->gtOp.gtOp2; } #endif // _TARGET_X86_ // Insert a null check on "this" pointer if asked. if (call->NeedsNullCheck()) { const regNumber regThis = genGetThisArgReg(call); getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0); } // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method. CORINFO_METHOD_HANDLE methHnd; GenTree* target = call->gtControlExpr; if (callType == CT_INDIRECT) { assert(target == nullptr); target = call->gtCall.gtCallAddr; methHnd = nullptr; } else { methHnd = call->gtCallMethHnd; } CORINFO_SIG_INFO* sigInfo = nullptr; #ifdef DEBUG // Pass the call signature information down into the emitter so the emitter can associate // native call sites with the signatures they were generated from. if (callType != CT_HELPER) { sigInfo = call->callSig; } #endif // DEBUG // If fast tail call, then we are done. In this case we setup the args (both reg args // and stack args in incoming arg area) and call target in rax. Epilog sequence would // generate "jmp rax". if (call->IsFastTailCall()) { // Don't support fast tail calling JIT helpers assert(callType != CT_HELPER); // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr. assert(target != nullptr); genConsumeReg(target); if (target->gtRegNum != REG_RAX) { inst_RV_RV(INS_mov, REG_RAX, target->gtRegNum); } return ; } // For a pinvoke to unmanged code we emit a label to clear // the GC pointer state before the callsite. // We can't utilize the typical lazy killing of GC pointers // at (or inside) the callsite. if (call->IsUnmanaged()) { genDefineTempLabel(genCreateTempLabel()); } // Determine return value size. emitAttr retSize = EA_PTRSIZE; if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY) { retSize = EA_GCREF; } else if (call->gtType == TYP_BYREF) { retSize = EA_BYREF; } #ifdef DEBUGGING_SUPPORT // We need to propagate the IL offset information to the call instruction, so we can emit // an IL to native mapping record for the call, to support managed return value debugging. // We don't want tail call helper calls that were converted from normal calls to get a record, // so we skip this hash table lookup logic in that case. if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall()) { (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset); } #endif // DEBUGGING_SUPPORT if (target != nullptr) { if (target->isContainedIndir()) { if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed()) { // Note that if gtControlExpr is an indir of an absolute address, we mark it as // contained only if it can be encoded as PC-relative offset. assert(genAddrShouldUsePCRel(target->AsIndir()->Base()->AsIntConCommon()->IconValue())); genEmitCall(emitter::EC_FUNC_TOKEN_INDIR, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) (void*) target->AsIndir()->Base()->AsIntConCommon()->IconValue(), #ifdef _TARGET_X86_ stackArgBytes, #endif // _TARGET_X86_ retSize, ilOffset); } else { GenTree* addr = target->gtGetOp1(); genConsumeAddress(addr); genEmitCall(emitter::EC_INDIR_ARD, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) target->AsIndir(), #ifdef _TARGET_X86_ stackArgBytes, #endif // _TARGET_X86_ retSize, ilOffset); } } else { // We have already generated code for gtControlExpr evaluating it into a register. // We just need to emit "call reg" in this case. assert(genIsValidIntReg(target->gtRegNum)); genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, //addr #ifdef _TARGET_X86_ stackArgBytes, #endif // _TARGET_X86_ retSize, ilOffset, genConsumeReg(target)); } } #if defined(_TARGET_AMD64_) && defined(FEATURE_READYTORUN_COMPILER) else if (call->gtEntryPoint.addr != nullptr) { genEmitCall(emitter::EC_FUNC_TOKEN_INDIR, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) (void*) call->gtEntryPoint.addr, retSize, ilOffset); } #endif else { // Generate a direct call to a non-virtual user defined or helper method assert(callType == CT_HELPER || callType == CT_USER_FUNC); void *addr = nullptr; if (callType == CT_HELPER) { // Direct call to a helper method. CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd); noway_assert(helperNum != CORINFO_HELP_UNDEF); void *pAddr = nullptr; addr = compiler->compGetHelperFtn(helperNum, (void **)&pAddr); if (addr == nullptr) { addr = pAddr; } } else { // Direct call to a non-virtual user function. CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY; if (call->IsSameThis()) { aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS); } if ((call->NeedsNullCheck()) == 0) { aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL); } CORINFO_CONST_LOOKUP addrInfo; compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags); addr = addrInfo.addr; } // Non-virtual direct calls to known addresses genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, #ifdef _TARGET_X86_ stackArgBytes, #endif // _TARGET_X86_ retSize, ilOffset); } // if it was a pinvoke we may have needed to get the address of a label if (genPendingCallLabel) { assert(call->IsUnmanaged()); genDefineTempLabel(genPendingCallLabel); genPendingCallLabel = nullptr; } #ifdef _TARGET_X86_ // The call will pop its arguments. genStackLevel -= stackArgBytes; #endif // _TARGET_X86_ // Update GC info: // All Callee arg registers are trashed and no longer contain any GC pointers. // TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here? // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other // registers from RBM_CALLEE_TRASH. assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0); gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS; gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS; var_types returnType = call->TypeGet(); if (returnType != TYP_VOID) { #ifdef _TARGET_X86_ if (varTypeIsFloating(returnType)) { // Spill the value from the fp stack. // Then, load it into the target register. call->gtFlags |= GTF_SPILL; regSet.rsSpillFPStack(call); call->gtFlags |= GTF_SPILLED; call->gtFlags &= ~GTF_SPILL; genUnspillRegIfNeeded(call); } else #endif // _TARGET_X86_ { regNumber returnReg = (varTypeIsFloating(returnType) ? REG_FLOATRET : REG_INTRET); if (call->gtRegNum != returnReg) { inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType); } genProduceReg(call); } } // If there is nothing next, that means the result is thrown away, so this value is not live. // However, for minopts or debuggable code, we keep it live to support managed return value debugging. if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode) { gcInfo.gcMarkRegSetNpt(RBM_INTRET); } } // Produce code for a GT_JMP node. // The arguments of the caller needs to be transferred to the callee before exiting caller. // The actual jump to callee is generated as part of caller epilog sequence. // Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup. void CodeGen::genJmpMethod(GenTreePtr jmp) { assert(jmp->OperGet() == GT_JMP); assert(compiler->compJmpOpUsed); // If no arguments, nothing to do if (compiler->info.compArgsCount == 0) { return; } // Make sure register arguments are in their initial registers // and stack arguments are put back as well. unsigned varNum; LclVarDsc* varDsc; // First move any en-registered stack arguments back to the stack. // At the same time any reg arg not in correct reg is moved back to its stack location. // // We are not strictly required to spill reg args that are not in the desired reg for a jmp call // But that would require us to deal with circularity while moving values around. Spilling // to stack makes the implementation simple, which is not a bad trade off given Jmp calls // are not frequent. for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) { varDsc = compiler->lvaTable + varNum; if (varDsc->lvPromoted) { noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here unsigned fieldVarNum = varDsc->lvFieldLclStart; varDsc = compiler->lvaTable + fieldVarNum; } noway_assert(varDsc->lvIsParam); if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK)) { // Skip reg args which are already in its right register for jmp call. // If not, we will spill such args to their stack locations. // // If we need to generate a tail call profiler hook, then spill all // arg regs to free them up for the callback. if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg)) continue; } else if (varDsc->lvRegNum == REG_STK) { // Skip args which are currently living in stack. continue; } // If we came here it means either a reg argument not in the right register or // a stack argument currently living in a register. In either case the following // assert should hold. assert(varDsc->lvRegNum != REG_STK); var_types loadType = varDsc->lvaArgType(); getEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), varDsc->lvRegNum, varNum, 0); // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live. // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. // Therefore manually update life of varDsc->lvRegNum. regMaskTP tempMask = genRegMask(varDsc->lvRegNum); regSet.rsMaskVars &= ~tempMask; gcInfo.gcMarkRegSetNpt(tempMask); if (varDsc->lvTracked) { VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); } } #ifdef PROFILING_SUPPORTED // At this point all arg regs are free. // Emit tail call profiler callback. genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL); #endif // Next move any un-enregistered register arguments back to their register. regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method. unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method. for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++) { varDsc = compiler->lvaTable + varNum; if (varDsc->lvPromoted) { noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here unsigned fieldVarNum = varDsc->lvFieldLclStart; varDsc = compiler->lvaTable + fieldVarNum; } noway_assert(varDsc->lvIsParam); // Skip if arg not passed in a register. if (!varDsc->lvIsRegArg) continue; // Register argument noway_assert(isRegParamType(genActualType(varDsc->TypeGet()))); // Is register argument already in the right register? // If not load it from its stack location. var_types loadType = varDsc->lvaArgType(); regNumber argReg = varDsc->lvArgReg; // incoming arg register if (varDsc->lvRegNum != argReg) { assert(genIsValidReg(argReg)); getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0); // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live. // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it. // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList(). regSet.rsMaskVars |= genRegMask(argReg); gcInfo.gcMarkRegPtrVal(argReg, loadType); if (varDsc->lvTracked) { VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum); } } // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg register. if (compiler->info.compIsVarArgs) { regNumber intArgReg; if (varTypeIsFloating(loadType)) { intArgReg = compiler->getCallArgIntRegister(argReg); instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG); inst_RV_RV(ins, argReg, intArgReg, loadType); } else { intArgReg = argReg; } fixedIntArgMask |= genRegMask(intArgReg); if (intArgReg == REG_ARG_0) { assert(firstArgVarNum == BAD_VAR_NUM); firstArgVarNum = varNum; } } } // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments, // load the remaining arg registers (both int and float) from the corresponding // shadow stack slots. This is for the reason that we don't know the number and type // of non-fixed params passed by the caller, therefore we have to assume the worst case // of caller passing float/double args both in int and float arg regs. // // The caller could have passed gc-ref/byref type var args. Since these are var args // the callee no way of knowing their gc-ness. Therefore, mark the region that loads // remaining arg registers from shadow stack slots as non-gc interruptible. if (fixedIntArgMask != RBM_NONE) { assert(compiler->info.compIsVarArgs); assert(firstArgVarNum != BAD_VAR_NUM); regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask; if (remainingIntArgMask != RBM_NONE) { instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE); getEmitter()->emitDisableGC(); for (int argNum = 0, argOffset=0; argNum < MAX_REG_ARG; ++argNum) { regNumber argReg = intArgRegs[argNum]; regMaskTP argRegMask = genRegMask(argReg); if ((remainingIntArgMask & argRegMask) != 0) { remainingIntArgMask &= ~argRegMask; getEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset); // also load it in corresponding float arg reg regNumber floatReg = compiler->getCallArgFloatRegister(argReg); inst_RV_RV(insCopyIntToFloat, floatReg, argReg); } argOffset += REGSIZE_BYTES; } getEmitter()->emitEnableGC(); } } } // produce code for a GT_LEA subnode void CodeGen::genLeaInstruction(GenTreeAddrMode *lea) { emitAttr size = emitTypeSize(lea); genConsumeOperands(lea); if (lea->Base() && lea->Index()) { regNumber baseReg = lea->Base()->gtRegNum; regNumber indexReg = lea->Index()->gtRegNum; getEmitter()->emitIns_R_ARX (INS_lea, size, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->gtOffset); } else if (lea->Base()) { getEmitter()->emitIns_R_AR (INS_lea, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->gtOffset); } else if (lea->Index()) { getEmitter()->emitIns_R_ARX (INS_lea, size, lea->gtRegNum, REG_NA, lea->Index()->gtRegNum, lea->gtScale, lea->gtOffset); } genProduceReg(lea); } /***************************************************************************** * The condition to use for (the jmp/set for) the given type of compare operation are * returned in 'jmpKind' array. The corresponding elements of jmpToTrueLabel indicate * the branch target when the condition being true. * * jmpToTrueLabel[i]= true implies branch to the target when the compare operation is true. * jmpToTrueLabel[i]= false implies branch to the target when the compare operation is false. */ // static void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2]) { // Except for BEQ (= ordered GT_EQ) both jumps are to the true label. jmpToTrueLabel[0] = true; jmpToTrueLabel[1] = true; // For integer comparisons just use genJumpKindForOper if (!varTypeIsFloating(cmpTree->gtOp.gtOp1->gtEffectiveVal())) { jmpKind[0] = genJumpKindForOper(cmpTree->gtOper, (cmpTree->gtFlags & GTF_UNSIGNED) != 0); jmpKind[1] = EJ_NONE; } else { assert(cmpTree->OperIsCompare()); // For details on how we arrived at this mapping, see the comment block in genCodeForTreeNode() // while generating code for compare opererators (e.g. GT_EQ etc). if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0) { // Unordered switch (cmpTree->gtOper) { case GT_LT: case GT_GT: jmpKind[0] = EJ_jb; jmpKind[1] = EJ_NONE; break; case GT_LE: case GT_GE: jmpKind[0] = EJ_jbe; jmpKind[1] = EJ_NONE; break; case GT_NE: jmpKind[0] = EJ_jpe; jmpKind[1] = EJ_jne; break; case GT_EQ: jmpKind[0] = EJ_je; jmpKind[1] = EJ_NONE; break; default: unreached(); } } else { switch (cmpTree->gtOper) { case GT_LT: case GT_GT: jmpKind[0] = EJ_ja; jmpKind[1] = EJ_NONE; break; case GT_LE: case GT_GE: jmpKind[0] = EJ_jae; jmpKind[1] = EJ_NONE; break; case GT_NE: jmpKind[0] = EJ_jne; jmpKind[1] = EJ_NONE; break; case GT_EQ: jmpKind[0] = EJ_jpe; jmpKind[1] = EJ_je; jmpToTrueLabel[0] = false; break; default: unreached(); } } } } // Generate code to materialize a condition into a register // (the condition codes must already have been appropriately set) void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree) { noway_assert((genRegMask(dstReg) & RBM_BYTE_REGS) != 0); emitJumpKind jumpKind[2]; bool branchToTrueLabel[2]; genJumpKindsForTree(tree, jumpKind, branchToTrueLabel); if (jumpKind[1] == EJ_NONE) { // Set (lower byte of) reg according to the flags inst_SET(jumpKind[0], dstReg); } else { // jmpKind[1] != EJ_NONE implies BEQ and BEN.UN of floating point values. // These are represented by two conditions. #ifdef DEBUG if (tree->gtOper == GT_EQ) { // This must be an ordered comparison. assert((tree->gtFlags & GTF_RELOP_NAN_UN) == 0); } else { // This must be BNE.UN assert((tree->gtOper == GT_NE) && ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)); } #endif // Here is the sample code generated in each case: // BEQ == cmp, jpe , je // That is, to materialize comparison reg needs to be set if PF=0 and ZF=1 // setnp reg // if (PF==0) reg = 1 else reg = 0 // jpe L1 // Jmp if PF==1 // sete reg // L1: // // BNE.UN == cmp, jpe , jne // That is, to materialize the comparison reg needs to be set if either PF=1 or ZF=0; // setp reg // jpe L1 // setne reg // L1: // reverse the jmpkind condition before setting dstReg if it is to false label. inst_SET(branchToTrueLabel[0] ? jumpKind[0] : emitter::emitReverseJumpKind(jumpKind[0]), dstReg); BasicBlock* label = genCreateTempLabel(); inst_JMP(jumpKind[0], label); // second branch is always to true label assert(branchToTrueLabel[1]); inst_SET(jumpKind[1], dstReg); genDefineTempLabel(label); } var_types treeType = tree->TypeGet(); if (treeType == TYP_INT || treeType == TYP_LONG) { // Set the higher bytes to 0 inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), dstReg, dstReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE)); } else { // FP types have been converted to flow noway_assert(treeType == TYP_BYTE); } } //------------------------------------------------------------------------ // genIntToIntCast: Generate code for an integer cast // This method handles integer overflow checking casts // as well as ordinary integer casts. // // Arguments: // treeNode - The GT_CAST node // // Return Value: // None. // // Assumptions: // The treeNode is not a contained node and must have an assigned register. // For a signed convert from byte, the source must be in a byte-addressable register. // Neither the source nor target type can be a floating point type. // // TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register. // void CodeGen::genIntToIntCast(GenTreePtr treeNode) { assert(treeNode->OperGet() == GT_CAST); GenTreePtr castOp = treeNode->gtCast.CastOp(); regNumber targetReg = treeNode->gtRegNum; regNumber sourceReg = castOp->gtRegNum; var_types dstType = treeNode->CastToType(); bool isUnsignedDst = varTypeIsUnsigned(dstType); var_types srcType = genActualType(castOp->TypeGet()); bool isUnsignedSrc = varTypeIsUnsigned(srcType); // if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0) { srcType = genUnsignedType(srcType); isUnsignedSrc = true; } bool requiresOverflowCheck = false; bool needAndAfter = false; assert(genIsValidIntReg(targetReg)); assert(genIsValidIntReg(sourceReg)); instruction ins = INS_invalid; emitAttr size = EA_UNKNOWN; if (genTypeSize(srcType) < genTypeSize(dstType)) { // Widening cast // Is this an Overflow checking cast? // We only need to handle one case, as the other casts can never overflow. // cast from TYP_INT to TYP_ULONG // if (treeNode->gtOverflow() && (srcType == TYP_INT) && (dstType == TYP_ULONG)) { requiresOverflowCheck = true; size = EA_ATTR(genTypeSize(srcType)); ins = INS_mov; } else { // we need the source size size = EA_ATTR(genTypeSize(srcType)); noway_assert(size < EA_PTRSIZE); ins = ins_Move_Extend(srcType, castOp->InReg()); /* Special case: ins_Move_Extend assumes the destination type is no bigger than TYP_INT. movsx and movzx can already extend all the way to 64-bit, and a regular 32-bit mov clears the high 32 bits (like the non-existant movzxd), but for a sign extension from TYP_INT to TYP_LONG, we need to use movsxd opcode. */ if (!isUnsignedSrc && !isUnsignedDst && (size == EA_4BYTE) && (genTypeSize(dstType) > EA_4BYTE)) { #ifdef _TARGET_X86_ NYI_X86("Cast to 64 bit for x86/RyuJIT"); #else // !_TARGET_X86_ ins = INS_movsxd; #endif // !_TARGET_X86_ } /* Special case: for a cast of byte to char we first have to expand the byte (w/ sign extension), then mask off the high bits. Use 'movsx' followed by 'and' */ if (!isUnsignedSrc && isUnsignedDst && (genTypeSize(dstType) < EA_4BYTE)) { noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE); needAndAfter = true; } } } else { // Narrowing cast, or sign-changing cast noway_assert(genTypeSize(srcType) >= genTypeSize(dstType)); // Is this an Overflow checking cast? if (treeNode->gtOverflow()) { requiresOverflowCheck = true; size = EA_ATTR(genTypeSize(srcType)); ins = INS_mov; } else { size = EA_ATTR(genTypeSize(dstType)); ins = ins_Move_Extend(dstType, castOp->InReg()); } } noway_assert(ins != INS_invalid); genConsumeReg(castOp); if (requiresOverflowCheck) { ssize_t typeMin = 0; ssize_t typeMax = 0; ssize_t typeMask = 0; bool needScratchReg = false; bool signCheckOnly = false; /* Do we need to compare the value, or just check masks */ switch (dstType) { case TYP_BYTE: typeMask = ssize_t((int)0xFFFFFF80); typeMin = SCHAR_MIN; typeMax = SCHAR_MAX; break; case TYP_UBYTE: typeMask = ssize_t((int)0xFFFFFF00L); break; case TYP_SHORT: typeMask = ssize_t((int)0xFFFF8000); typeMin = SHRT_MIN; typeMax = SHRT_MAX; break; case TYP_CHAR: typeMask = ssize_t((int)0xFFFF0000L); break; case TYP_INT: if (srcType == TYP_UINT) { signCheckOnly = true; } else { typeMask = 0xFFFFFFFF80000000LL; typeMin = INT_MIN; typeMax = INT_MAX; } break; case TYP_UINT: if (srcType == TYP_INT) { signCheckOnly = true; } else { needScratchReg = true; } break; case TYP_LONG: noway_assert(srcType == TYP_ULONG); signCheckOnly = true; break; case TYP_ULONG: noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT)); signCheckOnly = true; break; default: NO_WAY("Unknown type"); return; } if (signCheckOnly) { // We only need to check for a negative value in sourceReg inst_RV_IV(INS_cmp, sourceReg, 0, size); genJumpToThrowHlpBlk(EJ_jl, Compiler::ACK_OVERFLOW); if (dstType == TYP_ULONG) { // cast from TYP_INT to TYP_ULONG // The upper bits on sourceReg will already be zero by definition (x64) srcType = TYP_ULONG; size = EA_8BYTE; } } else { regNumber tmpReg = REG_NA; if (needScratchReg) { // We need an additional temp register // Make sure we have exactly one allocated. assert(treeNode->gtRsvdRegs != RBM_NONE); assert(genCountBits(treeNode->gtRsvdRegs) == 1); tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); } // When we are converting from unsigned or to unsigned, we // will only have to check for any bits set using 'typeMask' if (isUnsignedSrc || isUnsignedDst) { if (needScratchReg) { inst_RV_RV(INS_mov, tmpReg, sourceReg, TYP_LONG); // Move the 64-bit value to a writeable temp reg inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, size, tmpReg, 32); // Shift right by 32 bits genJumpToThrowHlpBlk(EJ_jne, Compiler::ACK_OVERFLOW); // Thow if result shift is non-zero } else { noway_assert(typeMask != 0); inst_RV_IV(INS_TEST, sourceReg, typeMask, size); genJumpToThrowHlpBlk(EJ_jne, Compiler::ACK_OVERFLOW); } } else { // For a narrowing signed cast // // We must check the value is in a signed range. // Compare with the MAX noway_assert((typeMin != 0) && (typeMax != 0)); inst_RV_IV(INS_cmp, sourceReg, typeMax, size); genJumpToThrowHlpBlk(EJ_jg, Compiler::ACK_OVERFLOW); // Compare with the MIN inst_RV_IV(INS_cmp, sourceReg, typeMin, size); genJumpToThrowHlpBlk(EJ_jl, Compiler::ACK_OVERFLOW); } } if (targetReg != sourceReg) inst_RV_RV(ins, targetReg, sourceReg, srcType, size); } else // non-overflow checking cast { noway_assert(size < EA_PTRSIZE || srcType == dstType); // We may have code transformations that result in casts where srcType is the same as dstType. // e.g. Bug 824281, in which a comma is split by the rationalizer, leaving an assignment of a // long constant to a long lclVar. if (srcType == dstType) { ins = INS_mov; } /* Is the value sitting in a non-byte-addressable register? */ else if (castOp->InReg() && (size == EA_1BYTE) && !isByteReg(sourceReg)) { if (isUnsignedDst) { // for unsigned values we can AND, so it need not be a byte register ins = INS_AND; } else { // Move the value into a byte register noway_assert(!"Signed byte convert from non-byte-addressable register"); } /* Generate "mov targetReg, castOp->gtReg */ if (targetReg != sourceReg) { inst_RV_RV(INS_mov, targetReg, sourceReg, srcType); } } else if (treeNode->gtSetFlags() && isUnsignedDst && castOp->InReg() && (targetReg == sourceReg)) { // if we (might) need to set the flags and the value is in the same register // and we have an unsigned value then use AND instead of MOVZX noway_assert(ins == INS_movzx || ins == INS_mov); ins = INS_AND; } if (ins == INS_AND) { noway_assert((needAndAfter == false) && isUnsignedDst); /* Generate "and reg, MASK */ insFlags flags = treeNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; unsigned fillPattern; if (size == EA_1BYTE) fillPattern = 0xff; else if (size == EA_2BYTE) fillPattern = 0xffff; else fillPattern = 0xffffffff; inst_RV_IV(INS_AND, targetReg, fillPattern, EA_4BYTE, flags); } #ifdef _TARGET_AMD64_ else if (ins == INS_movsxd) { noway_assert(!needAndAfter); inst_RV_RV(ins, targetReg, sourceReg, srcType, size); } #endif // _TARGET_AMD64_ else if (ins == INS_mov) { noway_assert(!needAndAfter); if (targetReg != sourceReg) { inst_RV_RV(ins, targetReg, sourceReg, srcType, size); } } else { noway_assert(ins == INS_movsx || ins == INS_movzx); /* Generate "mov targetReg, castOp->gtReg */ inst_RV_RV(ins, targetReg, sourceReg, srcType, size); /* Mask off high bits for cast from byte to char */ if (needAndAfter) { noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx); insFlags flags = treeNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; inst_RV_IV(INS_AND, targetReg, 0xFFFF, EA_4BYTE, flags); } } } genProduceReg(treeNode); } //------------------------------------------------------------------------ // genFloatToFloatCast: Generate code for a cast between float and double // // Arguments: // treeNode - The GT_CAST node // // Return Value: // None. // // Assumptions: // Cast is a non-overflow conversion. // The treeNode must have an assigned register. // The cast is between float and double or vice versa. // void CodeGen::genFloatToFloatCast(GenTreePtr treeNode) { // float <--> double conversions are always non-overflow ones assert(treeNode->OperGet() == GT_CAST); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->gtRegNum; assert(genIsValidFloatReg(targetReg)); GenTreePtr op1 = treeNode->gtOp.gtOp1; #ifdef DEBUG // If not contained, must be a valid float reg. if (!op1->isContained()) { assert(genIsValidFloatReg(op1->gtRegNum)); } #endif var_types dstType = treeNode->CastToType(); var_types srcType = op1->TypeGet(); assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); genConsumeOperands(treeNode->AsOp()); if (srcType == dstType && targetReg == op1->gtRegNum) { // source and destinations types are the same and also reside in the same register. // we just need to consume and produce the reg in this case. ; } else { instruction ins = ins_FloatConv(dstType, srcType); getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1); } genProduceReg(treeNode); } //------------------------------------------------------------------------ // genIntToFloatCast: Generate code to cast an int/long to float/double // // Arguments: // treeNode - The GT_CAST node // // Return Value: // None. // // Assumptions: // Cast is a non-overflow conversion. // The treeNode must have an assigned register. // SrcType= int32/uint32/int64/uint64 and DstType=float/double. // void CodeGen::genIntToFloatCast(GenTreePtr treeNode) { // int type --> float/double conversions are always non-overflow ones assert(treeNode->OperGet() == GT_CAST); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->gtRegNum; assert(genIsValidFloatReg(targetReg)); GenTreePtr op1 = treeNode->gtOp.gtOp1; #ifdef DEBUG if (!op1->isContained()) { assert(genIsValidIntReg(op1->gtRegNum)); } #endif var_types dstType = treeNode->CastToType(); var_types srcType = op1->TypeGet(); assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType)); #if !defined(_TARGET_64BIT_) NYI_IF(varTypeIsLong(srcType), "Conversion from long to float"); #endif // !defined(_TARGET_64BIT_) // force the srcType to unsigned if GT_UNSIGNED flag is set if (treeNode->gtFlags & GTF_UNSIGNED) { srcType = genUnsignedType(srcType); } // We should never be seeing srcType whose size is not sizeof(int) nor sizeof(long). // For conversions from byte/sbyte/int16/uint16 to float/double, we would expect // either the front-end or lowering phase to have generated two levels of cast. // The first one is for widening smaller int type to int32 and the second one is // to the float/double. emitAttr srcSize = EA_ATTR(genTypeSize(srcType)); noway_assert((srcSize == EA_ATTR(genTypeSize(TYP_INT))) || (srcSize == EA_ATTR(genTypeSize(TYP_LONG)))); // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions // here since they should have been lowered apropriately. noway_assert(srcType != TYP_UINT); noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT)); // Note that here we need to specify srcType that will determine // the size of source reg/mem operand and rex.w prefix. genConsumeOperands(treeNode->AsOp()); instruction ins = ins_FloatConv(dstType, TYP_INT); getEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1); // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction // will interpret ULONG value as LONG. Hence we need to adjust the // result if sign-bit of srcType is set. if (srcType == TYP_ULONG) { assert(dstType == TYP_DOUBLE); assert(!op1->isContained()); // Set the flags without modifying op1. // test op1Reg, op1Reg inst_RV_RV(INS_test, op1->gtRegNum, op1->gtRegNum, srcType); // No need to adjust result if op1 >= 0 i.e. positive // Jge label BasicBlock* label = genCreateTempLabel(); inst_JMP(EJ_jge, label); // Adjust the result // result = result + 0x43f00000 00000000 // addsd resultReg, 0x43f00000 00000000 GenTreePtr *cns = &u8ToDblBitmask; if (*cns == nullptr) { double d; static_assert_no_msg(sizeof(double) == sizeof(__int64)); *((__int64 *)&d) = 0x43f0000000000000LL; *cns = genMakeConst(&d, dstType, treeNode, true); } inst_RV_TT(INS_addsd, treeNode->gtRegNum, *cns); genDefineTempLabel(label); } genProduceReg(treeNode); } //------------------------------------------------------------------------ // genFloatToIntCast: Generate code to cast float/double to int/long // // Arguments: // treeNode - The GT_CAST node // // Return Value: // None. // // Assumptions: // Cast is a non-overflow conversion. // The treeNode must have an assigned register. // SrcType=float/double and DstType= int32/uint32/int64/uint64 // // TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64 // void CodeGen::genFloatToIntCast(GenTreePtr treeNode) { // we don't expect to see overflow detecting float/double --> int type conversions here // as they should have been converted into helper calls by front-end. assert(treeNode->OperGet() == GT_CAST); assert(!treeNode->gtOverflow()); regNumber targetReg = treeNode->gtRegNum; assert(genIsValidIntReg(targetReg)); GenTreePtr op1 = treeNode->gtOp.gtOp1; #ifdef DEBUG if (!op1->isContained()) { assert(genIsValidFloatReg(op1->gtRegNum)); } #endif var_types dstType = treeNode->CastToType(); var_types srcType = op1->TypeGet(); assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType)); // We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG). // For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the // front-end or lowering phase to have generated two levels of cast. The first one is // for float or double to int32/uint32 and the second one for narrowing int32/uint32 to // the required smaller int type. emitAttr dstSize = EA_ATTR(genTypeSize(dstType)); noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG)))); // We shouldn't be seeing uint64 here as it should have been converted // into a helper call by either front-end or lowering phase. noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG)))); // If the dstType is TYP_UINT, we have 32-bits to encode the // float number. Any of 33rd or above bits can be the sign bit. // To acheive it we pretend as if we are converting it to a long. if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT)))) { dstType = TYP_LONG; } // Note that we need to specify dstType here so that it will determine // the size of destination integer register and also the rex.w prefix. genConsumeOperands(treeNode->AsOp()); instruction ins = ins_FloatConv(TYP_INT, srcType); getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1); genProduceReg(treeNode); } //------------------------------------------------------------------------ // genCkfinite: Generate code for ckfinite opcode. // // Arguments: // treeNode - The GT_CKFINITE node // // Return Value: // None. // // Assumptions: // GT_CKFINITE node has reserved an internal register. // // TODO-XArch-CQ - mark the operand as contained if known to be in // memory (e.g. field or an array element). // void CodeGen::genCkfinite(GenTreePtr treeNode) { assert(treeNode->OperGet() == GT_CKFINITE); GenTreePtr op1 = treeNode->gtOp.gtOp1; var_types targetType = treeNode->TypeGet(); int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent. // Extract exponent into a register. assert(treeNode->gtRsvdRegs != RBM_NONE); assert(genCountBits(treeNode->gtRsvdRegs) == 1); regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG); inst_RV_RV(ins, genConsumeReg(op1), tmpReg, targetType); if (targetType == TYP_DOUBLE) { // right shift by 32 bits to get to exponent. inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32); } // Mask of exponent with all 1's and check if the exponent is all 1's inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE); inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE); // If exponent is all 1's, throw ArithmeticException genJumpToThrowHlpBlk(EJ_je, Compiler::ACK_ARITH_EXCPN); // if it is a finite value copy it to targetReg if (treeNode->gtRegNum != op1->gtRegNum) { inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType); } genProduceReg(treeNode); } #ifdef _TARGET_AMD64_ int CodeGenInterface::genSPtoFPdelta() { int delta; // As per Amd64 ABI, RBP offset from initial RSP can be between 0 and 240 if // RBP needs to be reported in unwind codes. This case would arise for methods // with localloc. if (compiler->compLocallocUsed) { // We cannot base delta computation on compLclFrameSize since it changes from // tentative to final frame layout and hence there is a possibility of // under-estimating offset of vars from FP, which in turn results in under- // estimating instruction size. // // To be predictive and so as never to under-estimate offset of vars from FP // we will always position FP at min(240, outgoing arg area size). delta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize); } else if (compiler->opts.compDbgEnC) { // vm assumption on EnC methods is that rsp and rbp are equal delta = 0; } else { delta = genTotalFrameSize(); } return delta; } //--------------------------------------------------------------------- // genTotalFrameSize - return the total size of the stack frame, including local size, // callee-saved register size, etc. For AMD64, this does not include the caller-pushed // return address. // // Return value: // Total frame size // int CodeGenInterface::genTotalFrameSize() { assert(!IsUninitialized(compiler->compCalleeRegsPushed)); int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize; assert(totalFrameSize >= 0); return totalFrameSize; } //--------------------------------------------------------------------- // genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer. // This number is going to be negative, since the Caller-SP is at a higher // address than the frame pointer. // // There must be a frame pointer to call this function! // // We can't compute this directly from the Caller-SP, since the frame pointer // is based on a maximum delta from Initial-SP, so first we find SP, then // compute the FP offset. int CodeGenInterface::genCallerSPtoFPdelta() { assert(isFramePointerUsed()); int callerSPtoFPdelta; callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta(); assert(callerSPtoFPdelta <= 0); return callerSPtoFPdelta; } //--------------------------------------------------------------------- // genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP. // // This number will be negative. int CodeGenInterface::genCallerSPtoInitialSPdelta() { int callerSPtoSPdelta = 0; callerSPtoSPdelta -= genTotalFrameSize(); callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address // compCalleeRegsPushed does not account for the frame pointer // TODO-Cleanup: shouldn't this be part of genTotalFrameSize? if (isFramePointerUsed()) { callerSPtoSPdelta -= REGSIZE_BYTES; } assert(callerSPtoSPdelta <= 0); return callerSPtoSPdelta; } #endif // _TARGET_AMD64_ //----------------------------------------------------------------------------------------- // genSSE2BitwiseOp - generate SSE2 code for the given oper as "Operand BitWiseOp BitMask" // // Arguments: // treeNode - tree node // // Return value: // None // // Assumptions: // i) tree oper is one of GT_NEG or GT_MATH Abs() // ii) tree type is floating point type. // iii) caller of this routine needs to call genProduceReg() void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode) { regNumber targetReg = treeNode->gtRegNum; var_types targetType = treeNode->TypeGet(); assert(varTypeIsFloating(targetType)); float f; double d; GenTreePtr *bitMask = nullptr; instruction ins = INS_invalid; void *cnsAddr = nullptr; bool dblAlign = false; switch(treeNode->OperGet()) { case GT_NEG: // Neg(x) = flip the sign bit. // Neg(f) = f ^ 0x80000000 // Neg(d) = d ^ 0x8000000000000000 ins = genGetInsForOper(GT_XOR, targetType); if (targetType == TYP_FLOAT) { bitMask = &negBitmaskFlt; static_assert_no_msg(sizeof(float) == sizeof(int)); *((int *)&f) = 0x80000000; cnsAddr = &f; } else { bitMask = &negBitmaskDbl; static_assert_no_msg(sizeof(double) == sizeof(__int64)); *((__int64*)&d) = 0x8000000000000000LL; cnsAddr = &d; dblAlign = true; } break; case GT_MATH: assert(treeNode->gtMath.gtMathFN == CORINFO_INTRINSIC_Abs); // Abs(x) = set sign-bit to zero // Abs(f) = f & 0x7fffffff // Abs(d) = d & 0x7fffffffffffffff ins = genGetInsForOper(GT_AND, targetType); if (targetType == TYP_FLOAT) { bitMask = &absBitmaskFlt; static_assert_no_msg(sizeof(float) == sizeof(int)); *((int *)&f) = 0x7fffffff; cnsAddr = &f; } else { bitMask = &absBitmaskDbl; static_assert_no_msg(sizeof(double) == sizeof(__int64)); *((__int64*)&d) = 0x7fffffffffffffffLL; cnsAddr = &d; dblAlign = true; } break; default: assert(!"genSSE2: unsupported oper"); unreached(); break; } if (*bitMask == nullptr) { assert(cnsAddr != nullptr); *bitMask = genMakeConst(cnsAddr, targetType, treeNode, dblAlign); } // We need an additional register for bitmask. // Make sure we have one allocated. assert(treeNode->gtRsvdRegs != RBM_NONE); assert(genCountBits(treeNode->gtRsvdRegs) == 1); regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs); // Move operand into targetReg only if the reg reserved for // internal purpose is not the same as targetReg. GenTreePtr op1 = treeNode->gtOp.gtOp1; assert(!op1->isContained()); regNumber operandReg = genConsumeReg(op1); if (tmpReg != targetReg) { if (operandReg != targetReg) { inst_RV_RV(ins_Copy(targetType), targetReg, operandReg, targetType); } operandReg = tmpReg; } inst_RV_TT(ins_Load(targetType, false), tmpReg, *bitMask); assert(ins != INS_invalid); inst_RV_RV(ins, targetReg, operandReg, targetType); } //--------------------------------------------------------------------- // genMathIntrinsic - generate code for a given math intrinsic // // Arguments // treeNode - the GT_MATH node // // Return value: // None // void CodeGen::genMathIntrinsic(GenTreePtr treeNode) { // Right now only Sqrt/Abs are treated as math intrinsics. switch(treeNode->gtMath.gtMathFN) { case CORINFO_INTRINSIC_Sqrt: noway_assert(treeNode->TypeGet() == TYP_DOUBLE); genConsumeOperands(treeNode->AsOp()); getEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode, treeNode->gtOp.gtOp1); break; case CORINFO_INTRINSIC_Abs: genSSE2BitwiseOp(treeNode); break; default: assert(!"genMathIntrinsic: Unsupported math intrinsic"); unreached(); } genProduceReg(treeNode); } #ifdef _TARGET_X86_ void CodeGen::genPutArgStk(GenTreePtr treeNode) { assert(treeNode->OperGet() == GT_PUTARG_STK); var_types targetType = treeNode->TypeGet(); noway_assert(targetType != TYP_STRUCT); assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet())); GenTreePtr data = treeNode->gtOp.gtOp1; #if !defined(_TARGET_64BIT_) // On a 64-bit target, all of the long arguments have been decomposed into // a separate putarg_stk for each of the upper and lower halves. noway_assert(targetType != TYP_LONG); #endif // !defined(_TARGET_64BIT_) // Decrement SP. int argSize = genTypeSize(genActualType(targetType)); inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL)); genStackLevel += argSize; // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp? if (data->isContained()) { NYI_X86("Contained putarg_stk"); } else { genConsumeReg(data); getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0); } } #endif // _TARGET_X86_ /***************************************************************************** * * Create and record GC Info for the function. */ #ifdef _TARGET_AMD64_ void #else // !_TARGET_AMD64_ void* #endif // !_TARGET_AMD64_ CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUG_ARG(void* codePtr)) { #ifdef JIT32_GCENCODER return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUG_ARG(codePtr)); #else // !JIT32_GCENCODER genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUG_ARG(codePtr)); #endif // !JIT32_GCENCODER } #ifdef JIT32_GCENCODER void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUG_ARG(void* codePtr)) { BYTE headerBuf[64]; InfoHdr header; int s_cached; #ifdef DEBUG size_t headerSize = #endif compiler->compInfoBlkSize = gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached); size_t argTabOffset = 0; size_t ptrMapSize = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset); #if DISPLAY_SIZES if (genInterruptible) { gcHeaderISize += compiler->compInfoBlkSize; gcPtrMapISize += ptrMapSize; } else { gcHeaderNSize += compiler->compInfoBlkSize; gcPtrMapNSize += ptrMapSize; } #endif // DISPLAY_SIZES compiler->compInfoBlkSize += ptrMapSize; /* Allocate the info block for the method */ compiler->compInfoBlkAddr = (BYTE *) compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize); #if 0 // VERBOSE_SIZES // TODO-X86-Cleanup: 'dataSize', below, is not defined // if (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100) { printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n", compiler->info.compILCodeSize, compiler->compInfoBlkSize, codeSize + dataSize, codeSize + dataSize - prologSize - epilogSize, 100 * (codeSize + dataSize) / compiler->info.compILCodeSize, 100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize, compiler->info.compClassName, compiler->info.compMethodName); } #endif /* Fill in the info block and return it to the caller */ void* infoPtr = compiler->compInfoBlkAddr; /* Create the method info block: header followed by GC tracking tables */ compiler->compInfoBlkAddr += gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached); assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize); compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset); assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize); #ifdef DEBUG if (0) { BYTE * temp = (BYTE *)infoPtr; unsigned size = compiler->compInfoBlkAddr - temp; BYTE * ptab = temp + headerSize; noway_assert(size == headerSize + ptrMapSize); printf("Method info block - header [%u bytes]:", headerSize); for (unsigned i = 0; i < size; i++) { if (temp == ptab) { printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize); printf("\n %04X: %*c", i & ~0xF, 3*(i&0xF), ' '); } else { if (!(i % 16)) printf("\n %04X: ", i); } printf("%02X ", *temp++); } printf("\n"); } #endif // DEBUG #if DUMP_GC_TABLES if (compiler->opts.dspGCtbls) { const BYTE *base = (BYTE *)infoPtr; unsigned size; unsigned methodSize; InfoHdr dumpHeader; printf("GC Info for method %s\n", compiler->info.compFullName); printf("GC info size = %3u\n", compiler->compInfoBlkSize); size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize); // printf("size of header encoding is %3u\n", size); printf("\n"); if (compiler->opts.dspGCtbls) { base += size; size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize); // printf("size of pointer table is %3u\n", size); printf("\n"); noway_assert(compiler->compInfoBlkAddr == (base+size)); } } #ifdef DEBUG if (jitOpts.testMask & 128) { for (unsigned offs = 0; offs < codeSize; offs++) { gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs); } } #endif // DEBUG #endif // DUMP_GC_TABLES /* Make sure we ended up generating the expected number of bytes */ noway_assert(compiler->compInfoBlkAddr == (BYTE *)infoPtr + compiler->compInfoBlkSize); return infoPtr; } #else // !JIT32_GCENCODER void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUG_ARG(void* codePtr)) { IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC()); GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc); assert(gcInfoEncoder); // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32). gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize); // First we figure out the encoder ID's for the stack slots and registers. gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS); // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them). gcInfoEncoder->FinalizeSlotIds(); // Now we can actually use those slot ID's to declare live ranges. gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK); #if defined(DEBUGGING_SUPPORT) if (compiler->opts.compDbgEnC) { // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp) // which is: // -return address // -saved off RBP // -saved 'this' pointer and bool for synchronized methods // 4 slots for RBP + return address + RSI + RDI int preservedAreaSize = 4 * REGSIZE_BYTES; if (compiler->info.compFlags & CORINFO_FLG_SYNCH) { if (!(compiler->info.compFlags & CORINFO_FLG_STATIC)) preservedAreaSize += REGSIZE_BYTES; // bool in synchronized methods that tracks whether the lock has been taken (takes 4 bytes on stack) preservedAreaSize += 4; } // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the frame gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize); } #endif gcInfoEncoder->Build(); //GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t) //let's save the values anyway for debugging purposes compiler->compInfoBlkAddr = gcInfoEncoder->Emit(); compiler->compInfoBlkSize = 0; //not exposed by the GCEncoder interface } #endif // !JIT32_GCENCODER /***************************************************************************** * Emit a call to a helper function. * */ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize #ifdef _TARGET_AMD64_ ,regNumber callTargetReg /*= REG_HELPER_CALL_TARGET*/ #endif // _TARGET_AMD64_ ) { void * addr = NULL, *pAddr = NULL; #ifdef _TARGET_X86_ regNumber callTargetReg = REG_EAX; #endif // _TARGET_X86_ emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN; addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr); regNumber callTarget = REG_NA; if (!addr) { assert(pAddr != nullptr); if (genAddrShouldUsePCRel((size_t)pAddr)) { // generate call whose target is specified by PC-relative 32-bit offset. callType = emitter::EC_FUNC_TOKEN_INDIR; addr = pAddr; } else { // If this address cannot be encoded as PC-relative 32-bit offset, load it into REG_HELPER_CALL_TARGET // and use register indirect addressing mode to make the call. // mov reg, addr // call [reg] callTarget = callTargetReg; CodeGen::genSetRegToIcon(callTarget, (ssize_t) pAddr, TYP_I_IMPL); callType = emitter::EC_INDIR_ARD; } } getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, /* IL offset */ callTarget, /* ireg */ REG_NA, 0, 0, /* xreg, xmul, disp */ false, /* isJump */ emitter::emitNoGChelper(helper)); regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper); regTracker.rsTrashRegSet(killMask); regTracker.rsTrashRegsForGCInterruptability(); } #if !defined(_TARGET_64BIT_) //----------------------------------------------------------------------------- // // Code Generation for Long integers // //----------------------------------------------------------------------------- //------------------------------------------------------------------------ // genStoreLongLclVar: Generate code to store a non-enregistered long lclVar // // Arguments: // treeNode - A TYP_LONG lclVar node. // // Return Value: // None. // // Assumptions: // 'treeNode' must be a TYP_LONG lclVar node for a lclVar that has NOT been promoted. // Its operand must be a GT_LONG node. // void CodeGen::genStoreLongLclVar(GenTree* treeNode) { emitter* emit = getEmitter(); GenTreeLclVarCommon* lclNode = treeNode->AsLclVarCommon(); unsigned lclNum = lclNode->gtLclNum; LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]); assert(varDsc->TypeGet() == TYP_LONG); assert(!varDsc->lvPromoted); GenTreePtr op1 = treeNode->gtOp.gtOp1; noway_assert(op1->OperGet() == GT_LONG); genConsumeRegs(op1); // Definitions of register candidates will have been lowered to 2 int lclVars. assert(!treeNode->InReg()); GenTreePtr loVal = op1->gtGetOp1(); GenTreePtr hiVal = op1->gtGetOp2(); // NYI: Contained immediates. NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate"); emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0); emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT)); } #endif // !defined(_TARGET_64BIT_) /***************************************************************************** * Unit testing of the XArch emitter: generate a bunch of instructions into the prolog * (it's as good a place as any), then use COMPLUS_JitLateDisasm=* to see if the late * disassembler thinks the instructions as the same as we do. */ // Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here. // After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time. //#define ALL_XARCH_EMITTER_UNIT_TESTS #if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_) void CodeGen::genAmd64EmitterUnitTests() { if (!verbose) { return; } if (!compiler->opts.altJit) { // No point doing this in a "real" JIT. return; } // Mark the "fake" instructions in the output. printf("*************** In genAmd64EmitterUnitTests()\n"); // We use this: // genDefineTempLabel(genCreateTempLabel()); // to create artificial labels to help separate groups of tests. // // Loads // #ifdef ALL_XARCH_EMITTER_UNIT_TESTS #ifdef FEATURE_AVX_SUPPORT genDefineTempLabel(genCreateTempLabel()); // vhaddpd ymm0,ymm1,ymm2 getEmitter()->emitIns_R_R_R(INS_haddpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vaddss xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_addss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vaddsd xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_addsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vaddps xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_addps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vaddps ymm0,ymm1,ymm2 getEmitter()->emitIns_R_R_R(INS_addps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vaddpd xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_addpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vaddpd ymm0,ymm1,ymm2 getEmitter()->emitIns_R_R_R(INS_addpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vsubss xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_subss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vsubsd xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_subsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vsubps ymm0,ymm1,ymm2 getEmitter()->emitIns_R_R_R(INS_subps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vsubps ymm0,ymm1,ymm2 getEmitter()->emitIns_R_R_R(INS_subps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vsubpd xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_subpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vsubpd ymm0,ymm1,ymm2 getEmitter()->emitIns_R_R_R(INS_subpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vmulss xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_mulss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vmulsd xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_mulsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vmulps xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_mulps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vmulpd xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_mulpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vmulps ymm0,ymm1,ymm2 getEmitter()->emitIns_R_R_R(INS_mulps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vmulpd ymm0,ymm1,ymm2 getEmitter()->emitIns_R_R_R(INS_mulpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vandps xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_andps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vandpd xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_andpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vandps ymm0,ymm1,ymm2 getEmitter()->emitIns_R_R_R(INS_andps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vandpd ymm0,ymm1,ymm2 getEmitter()->emitIns_R_R_R(INS_andpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vorps xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_orps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vorpd xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_orpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vorps ymm0,ymm1,ymm2 getEmitter()->emitIns_R_R_R(INS_orps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vorpd ymm0,ymm1,ymm2 getEmitter()->emitIns_R_R_R(INS_orpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vdivss xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vdivsd xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vdivss xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vdivsd xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vdivss xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_cvtss2sd, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2); // vdivsd xmm0,xmm1,xmm2 getEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2); #endif // FEATURE_AVX_SUPPORT #endif // ALL_XARCH_EMITTER_UNIT_TESTS printf("*************** End of genAmd64EmitterUnitTests()\n"); } #endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_) /*****************************************************************************/ #ifdef DEBUGGING_SUPPORT /***************************************************************************** * genSetScopeInfo * * Called for every scope info piece to record by the main genSetScopeInfo() */ void CodeGen::genSetScopeInfo (unsigned which, UNATIVE_OFFSET startOffs, UNATIVE_OFFSET length, unsigned varNum, unsigned LVnum, bool avail, Compiler::siVarLoc& varLoc) { /* We need to do some mapping while reporting back these variables */ unsigned ilVarNum = compiler->compMap2ILvarNum(varNum); noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM); VarName name = nullptr; #ifdef DEBUG for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++) { if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum) { name = compiler->info.compVarScopes[scopeNum].vsdName; } } // Hang on to this compiler->info. TrnslLocalVarInfo &tlvi = genTrnslLocalVarInfo[which]; tlvi.tlviVarNum = ilVarNum; tlvi.tlviLVnum = LVnum; tlvi.tlviName = name; tlvi.tlviStartPC = startOffs; tlvi.tlviLength = length; tlvi.tlviAvailable = avail; tlvi.tlviVarLoc = varLoc; #endif // DEBUG compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc); } #endif // DEBUGGING_SUPPORT #endif // _TARGET_AMD64_ #endif // !LEGACY_BACKEND