diff options
Diffstat (limited to 'src/jit/stackfp.cpp')
-rw-r--r-- | src/jit/stackfp.cpp | 4494 |
1 files changed, 4494 insertions, 0 deletions
diff --git a/src/jit/stackfp.cpp b/src/jit/stackfp.cpp new file mode 100644 index 0000000000..f975822740 --- /dev/null +++ b/src/jit/stackfp.cpp @@ -0,0 +1,4494 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#include "jitpch.h" +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#ifdef LEGACY_BACKEND // This file is NOT used for the RyuJIT backend that uses the linear scan register allocator. + +#ifdef _TARGET_AMD64_ +#error AMD64 must be !LEGACY_BACKEND +#endif + +#include "compiler.h" +#include "emit.h" +#include "codegen.h" + +// Instruction list +// N=normal, R=reverse, P=pop +#if FEATURE_STACK_FP_X87 +const static instruction FPmathNN[] = {INS_fadd, INS_fsub, INS_fmul, INS_fdiv}; +const static instruction FPmathNP[] = {INS_faddp, INS_fsubp, INS_fmulp, INS_fdivp}; +const static instruction FPmathRN[] = {INS_fadd, INS_fsubr, INS_fmul, INS_fdivr}; +const static instruction FPmathRP[] = {INS_faddp, INS_fsubrp, INS_fmulp, INS_fdivrp}; + +FlatFPStateX87* CodeGenInterface::FlatFPAllocFPState(FlatFPStateX87* pInitFrom) +{ + FlatFPStateX87* pNewState; + + pNewState = new (compiler, CMK_FlatFPStateX87) FlatFPStateX87; + pNewState->Init(pInitFrom); + + return pNewState; +} + +bool CodeGen::FlatFPSameRegisters(FlatFPStateX87* pState, regMaskTP mask) +{ + int i; + for (i = REG_FPV0; i < REG_FPCOUNT; i++) + { + if (pState->Mapped(i)) + { + regMaskTP regmask = genRegMaskFloat((regNumber)i); + if ((mask & regmask) == 0) + { + return false; + } + + mask &= ~regmask; + } + } + + return mask ? false : true; +} + +bool FlatFPStateX87::Mapped(unsigned uEntry) +{ + return m_uVirtualMap[uEntry] != (unsigned)FP_VRNOTMAPPED; +} + +void FlatFPStateX87::Unmap(unsigned uEntry) +{ + assert(Mapped(uEntry)); + m_uVirtualMap[uEntry] = (unsigned)FP_VRNOTMAPPED; +} + +bool FlatFPStateX87::AreEqual(FlatFPStateX87* pA, FlatFPStateX87* pB) +{ + unsigned i; + + assert(pA->IsConsistent()); + assert(pB->IsConsistent()); + + if (pA->m_uStackSize != pB->m_uStackSize) + { + return false; + } + + for (i = 0; i < pA->m_uStackSize; i++) + { + if (pA->m_uStack[i] != pB->m_uStack[i]) + { + return false; + } + } + + return true; +} + +#ifdef DEBUG +bool FlatFPStateX87::IsValidEntry(unsigned uEntry) +{ + return (Mapped(uEntry) && (m_uVirtualMap[uEntry] >= 0 && m_uVirtualMap[uEntry] < m_uStackSize)) || !Mapped(uEntry); +} + +bool FlatFPStateX87::IsConsistent() +{ + unsigned i; + + for (i = 0; i < FP_VIRTUALREGISTERS; i++) + { + if (!IsValidEntry(i)) + { + if (m_bIgnoreConsistencyChecks) + { + return true; + } + else + { + assert(!"Virtual register is marked as mapped but out of the stack range"); + return false; + } + } + } + + for (i = 0; i < m_uStackSize; i++) + { + if (m_uVirtualMap[m_uStack[i]] != i) + { + if (m_bIgnoreConsistencyChecks) + { + return true; + } + else + { + assert(!"Register File and stack layout don't match!"); + return false; + } + } + } + + return true; +} + +void FlatFPStateX87::Dump() +{ + unsigned i; + + assert(IsConsistent()); + + if (m_uStackSize > 0) + { + printf("Virtual stack state: "); + for (i = 0; i < m_uStackSize; i++) + { + printf("ST(%i): FPV%i | ", StackToST(i), m_uStack[i]); + } + printf("\n"); + } +} + +void FlatFPStateX87::UpdateMappingFromStack() +{ + memset(m_uVirtualMap, -1, sizeof(m_uVirtualMap)); + + unsigned i; + + for (i = 0; i < m_uStackSize; i++) + { + m_uVirtualMap[m_uStack[i]] = i; + } +} + +#endif + +unsigned FlatFPStateX87::StackToST(unsigned uEntry) +{ + assert(IsValidEntry(uEntry)); + return m_uStackSize - 1 - uEntry; +} + +unsigned FlatFPStateX87::VirtualToST(unsigned uEntry) +{ + assert(Mapped(uEntry)); + + return StackToST(m_uVirtualMap[uEntry]); +} + +unsigned FlatFPStateX87::STToVirtual(unsigned uST) +{ + assert(uST < m_uStackSize); + + return m_uStack[m_uStackSize - 1 - uST]; +} + +void FlatFPStateX87::Init(FlatFPStateX87* pFrom) +{ + if (pFrom) + { + memcpy(this, pFrom, sizeof(*this)); + } + else + { + memset(m_uVirtualMap, -1, sizeof(m_uVirtualMap)); + +#ifdef DEBUG + memset(m_uStack, -1, sizeof(m_uStack)); +#endif + m_uStackSize = 0; + } + +#ifdef DEBUG + m_bIgnoreConsistencyChecks = false; +#endif +} + +void FlatFPStateX87::Associate(unsigned uEntry, unsigned uStack) +{ + assert(uStack < m_uStackSize); + + m_uStack[uStack] = uEntry; + m_uVirtualMap[uEntry] = uStack; +} + +unsigned FlatFPStateX87::TopIndex() +{ + return m_uStackSize - 1; +} + +unsigned FlatFPStateX87::TopVirtual() +{ + assert(m_uStackSize > 0); + return m_uStack[m_uStackSize - 1]; +} + +void FlatFPStateX87::Rename(unsigned uVirtualTo, unsigned uVirtualFrom) +{ + assert(!Mapped(uVirtualTo)); + + unsigned uSlot = m_uVirtualMap[uVirtualFrom]; + + Unmap(uVirtualFrom); + Associate(uVirtualTo, uSlot); +} + +void FlatFPStateX87::Push(unsigned uEntry) +{ + assert(m_uStackSize <= FP_PHYSICREGISTERS); + assert(!Mapped(uEntry)); + + m_uStackSize++; + Associate(uEntry, TopIndex()); + + assert(IsConsistent()); +} + +unsigned FlatFPStateX87::Pop() +{ + assert(m_uStackSize != 0); + + unsigned uVirtual = m_uStack[--m_uStackSize]; + +#ifdef DEBUG + m_uStack[m_uStackSize] = (unsigned)-1; +#endif + + Unmap(uVirtual); + + return uVirtual; +} + +bool FlatFPStateX87::IsEmpty() +{ + return m_uStackSize == 0; +} + +void CodeGen::genCodeForTransitionStackFP(FlatFPStateX87* pSrc, FlatFPStateX87* pDst) +{ + FlatFPStateX87 fpState; + FlatFPStateX87* pTmp; + int i; + + // Make a temp copy + memcpy(&fpState, pSrc, sizeof(FlatFPStateX87)); + pTmp = &fpState; + + // Make sure everything seems consistent. + assert(pSrc->m_uStackSize >= pDst->m_uStackSize); +#ifdef DEBUG + for (i = 0; i < FP_VIRTUALREGISTERS; i++) + { + if (!pTmp->Mapped(i) && pDst->Mapped(i)) + { + assert(!"Dst stack state can't have a virtual register live if Src target has it dead"); + } + } +#endif + + // First we need to get rid of the stuff that's dead in pDst + for (i = 0; i < FP_VIRTUALREGISTERS; i++) + { + if (pTmp->Mapped(i) && !pDst->Mapped(i)) + { + // We have to get rid of this one + JITDUMP("Removing virtual register V%i from stack\n", i); + + // Don't need this virtual register any more + FlatFPX87_Unload(pTmp, i); + } + } + + assert(pTmp->m_uStackSize == pDst->m_uStackSize); + + // Extract cycles + int iProcessed = 0; + + // We start with the top of the stack so that we can + // easily recognize the cycle that contains it + for (i = pTmp->m_uStackSize - 1; i >= 0; i--) + { + // Have we processed this stack element yet? + if (((1 << i) & iProcessed) == 0) + { + // Extract cycle + int iCycle[FP_VIRTUALREGISTERS]; + int iCycleLength = 0; + int iCurrent = i; + int iTOS = pTmp->m_uStackSize - 1; + + do + { + // Mark current stack element as processed + iProcessed |= (1 << iCurrent); + + // Update cycle + iCycle[iCycleLength++] = iCurrent; + + // Next element in cycle + iCurrent = pDst->m_uVirtualMap[pTmp->m_uStack[iCurrent]]; + + } while ((iProcessed & (1 << iCurrent)) == 0); + +#ifdef DEBUG + if (verbose) + { + printf("Cycle: ("); + for (int l = 0; l < iCycleLength; l++) + { + printf("%i", pTmp->StackToST(iCycle[l])); + if (l + 1 < iCycleLength) + printf(", "); + } + printf(")\n"); + } +#endif + + // Extract cycle + if (iCycleLength == 1) + { + // Stack element in the same place. Nothing to do + } + else + { + if (iCycle[0] == iTOS) + { + // Cycle includes stack element 0 + int j; + + for (j = 1; j < iCycleLength; j++) + { + FlatFPX87_SwapStack(pTmp, iCycle[j], iTOS); + } + } + else + { + // Cycle doesn't include stack element 0 + int j; + + for (j = 0; j < iCycleLength; j++) + { + FlatFPX87_SwapStack(pTmp, iCycle[j], iTOS); + } + + FlatFPX87_SwapStack(pTmp, iCycle[0], iTOS); + } + } + } + } + + assert(FlatFPStateX87::AreEqual(pTmp, pDst)); +} + +void CodeGen::genCodeForTransitionFromMask(FlatFPStateX87* pSrc, regMaskTP mask, bool bEmitCode) +{ + unsigned i; + for (i = REG_FPV0; i < REG_FPCOUNT; i++) + { + if (pSrc->Mapped(i)) + { + if ((mask & genRegMaskFloat((regNumber)i)) == 0) + { + FlatFPX87_Unload(pSrc, i, bEmitCode); + } + } + else + { + assert((mask & genRegMaskFloat((regNumber)i)) == 0 && + "A register marked as incoming live in the target block isnt live in the current block"); + } + } +} + +void CodeGen::genCodeForPrologStackFP() +{ + assert(compiler->compGeneratingProlog); + assert(compiler->fgFirstBB); + + FlatFPStateX87* pState = compiler->fgFirstBB->bbFPStateX87; + + if (pState && pState->m_uStackSize) + { + VARSET_TP VARSET_INIT_NOCOPY(liveEnregIn, VarSetOps::Intersection(compiler, compiler->fgFirstBB->bbLiveIn, + compiler->optAllFPregVars)); + unsigned i; + +#ifdef DEBUG + unsigned uLoads = 0; +#endif + + assert(pState->m_uStackSize <= FP_VIRTUALREGISTERS); + for (i = 0; i < pState->m_uStackSize; i++) + { + // Get the virtual register that matches + unsigned iVirtual = pState->STToVirtual(pState->m_uStackSize - i - 1); + + unsigned varNum; + LclVarDsc* varDsc; + + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) + { + if (varDsc->IsFloatRegType() && varDsc->lvRegister && varDsc->lvRegNum == iVirtual) + { + unsigned varIndex = varDsc->lvVarIndex; + + // Is this variable live on entry? + if (VarSetOps::IsMember(compiler, liveEnregIn, varIndex)) + { + if (varDsc->lvIsParam) + { + getEmitter()->emitIns_S(INS_fld, EmitSize(varDsc->TypeGet()), varNum, 0); + } + else + { + // unitialized regvar + getEmitter()->emitIns(INS_fldz); + } + +#ifdef DEBUG + uLoads++; +#endif + break; + } + } + } + + assert(varNum != compiler->lvaCount); // We have to find the matching var!!!! + } + + assert(uLoads == VarSetOps::Count(compiler, liveEnregIn)); + } +} + +void CodeGen::genCodeForEndBlockTransitionStackFP(BasicBlock* block) +{ + switch (block->bbJumpKind) + { + case BBJ_EHFINALLYRET: + case BBJ_EHFILTERRET: + case BBJ_EHCATCHRET: + // Nothing to do + assert(compCurFPState.m_uStackSize == 0); + break; + case BBJ_THROW: + break; + case BBJ_RETURN: + // Nothing to do + assert((varTypeIsFloating(compiler->info.compRetType) && compCurFPState.m_uStackSize == 1) || + compCurFPState.m_uStackSize == 0); + break; + case BBJ_COND: + case BBJ_NONE: + genCodeForBBTransitionStackFP(block->bbNext); + break; + case BBJ_ALWAYS: + genCodeForBBTransitionStackFP(block->bbJumpDest); + break; + case BBJ_LEAVE: + assert(!"BBJ_LEAVE blocks shouldn't get here"); + break; + case BBJ_CALLFINALLY: + assert(compCurFPState.IsEmpty() && "we don't enregister variables live on entry to finallys"); + genCodeForBBTransitionStackFP(block->bbJumpDest); + break; + case BBJ_SWITCH: + // Nothing to do here + break; + default: + noway_assert(!"Unexpected bbJumpKind"); + break; + } +} + +regMaskTP CodeGen::genRegMaskFromLivenessStackFP(VARSET_VALARG_TP varset) +{ + unsigned varNum; + LclVarDsc* varDsc; + regMaskTP result = 0; + + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) + { + if (varDsc->IsFloatRegType() && varDsc->lvRegister) + { + + unsigned varIndex = varDsc->lvVarIndex; + + /* Is this variable live on entry? */ + + if (VarSetOps::IsMember(compiler, varset, varIndex)) + { + // We should only call this function doing a transition + // To a block which hasn't state yet. All incoming live enregistered variables + // should have been already initialized. + assert(varDsc->lvRegNum != REG_FPNONE); + + result |= genRegMaskFloat(varDsc->lvRegNum); + } + } + } + + return result; +} + +void CodeGen::genCodeForBBTransitionStackFP(BasicBlock* pDst) +{ + assert(compCurFPState.IsConsistent()); + if (pDst->bbFPStateX87) + { + // Target block has an associated state. generate transition + genCodeForTransitionStackFP(&compCurFPState, pDst->bbFPStateX87); + } + else + { + // Target block hasn't got an associated state. As it can only possibly + // have a subset of the current state, we'll take advantage of this and + // generate the optimal transition + + // Copy current state + pDst->bbFPStateX87 = FlatFPAllocFPState(&compCurFPState); + + regMaskTP liveRegIn = + genRegMaskFromLivenessStackFP(VarSetOps::Intersection(compiler, pDst->bbLiveIn, compiler->optAllFPregVars)); + + // Match to live vars + genCodeForTransitionFromMask(pDst->bbFPStateX87, liveRegIn); + } +} + +void CodeGen::SpillTempsStackFP(regMaskTP canSpillMask) +{ + + unsigned i; + regMaskTP spillMask = 0; + regNumber reg; + + // First pass we determine which registers we spill + for (i = 0; i < compCurFPState.m_uStackSize; i++) + { + reg = (regNumber)compCurFPState.m_uStack[i]; + regMaskTP regMask = genRegMaskFloat(reg); + if ((regMask & canSpillMask) && (regMask & regSet.rsMaskRegVarFloat) == 0) + { + spillMask |= regMask; + } + } + + // Second pass we do the actual spills + for (i = REG_FPV0; i < REG_FPCOUNT; i++) + { + if ((genRegMaskFloat((regNumber)i) & spillMask)) + { + JITDUMP("spilling temp in register %s\n", regVarNameStackFP((regNumber)i)); + SpillFloat((regNumber)i, true); + } + } +} + +// Spills all the fp stack. We need this to spill +// across calls +void CodeGen::SpillForCallStackFP() +{ + unsigned i; + unsigned uSize = compCurFPState.m_uStackSize; + + for (i = 0; i < uSize; i++) + { + SpillFloat((regNumber)compCurFPState.m_uStack[compCurFPState.TopIndex()], true); + } +} + +void CodeGenInterface::SpillFloat(regNumber reg, bool bIsCall) +{ +#ifdef DEBUG + regMaskTP mask = genRegMaskFloat(reg); + + // We can allow spilling regvars, but we don't need it at the moment, and we're + // missing code in setupopforflatfp, so assert. + assert(bIsCall || (mask & (regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat)) == 0); +#endif + + JITDUMP("SpillFloat spilling register %s\n", regVarNameStackFP(reg)); + + // We take the virtual register to the top of the stack + FlatFPX87_MoveToTOS(&compCurFPState, reg); + + // Allocate spill structure + RegSet::SpillDsc* spill = RegSet::SpillDsc::alloc(compiler, ®Set, TYP_FLOAT); + + // Fill out spill structure + var_types type; + if (regSet.genUsedRegsFloat[reg]) + { + JITDUMP("will spill tree [%08p]\n", dspPtr(regSet.genUsedRegsFloat[reg])); + // register used for temp stack + spill->spillTree = regSet.genUsedRegsFloat[reg]; + spill->bEnregisteredVariable = false; + + regSet.genUsedRegsFloat[reg]->gtFlags |= GTF_SPILLED; + + type = genActualType(regSet.genUsedRegsFloat[reg]->TypeGet()); + + // Clear used flag + regSet.SetUsedRegFloat(regSet.genUsedRegsFloat[reg], false); + } + else + { + JITDUMP("will spill varDsc [%08p]\n", dspPtr(regSet.genRegVarsFloat[reg])); + + // enregistered variable + spill->spillVarDsc = regSet.genRegVarsFloat[reg]; + assert(spill->spillVarDsc); + + spill->bEnregisteredVariable = true; + + // Mark as spilled + spill->spillVarDsc->lvSpilled = true; + type = genActualType(regSet.genRegVarsFloat[reg]->TypeGet()); + + // Clear register flag + SetRegVarFloat(reg, type, 0); + } + + // Add to spill list + spill->spillNext = regSet.rsSpillFloat; + regSet.rsSpillFloat = spill; + + // Obtain space + TempDsc* temp = spill->spillTemp = compiler->tmpGetTemp(type); + emitAttr size = EmitSize(type); + + getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0); + compCurFPState.Pop(); +} + +void CodeGen::UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc, bool useSameReg) +{ + NYI(!"Need not be implemented for x86."); +} + +void CodeGen::UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc) +{ + // Do actual unspill + if (spillDsc->bEnregisteredVariable) + { + assert(spillDsc->spillVarDsc->lvSpilled); + + // Do the logic as it was a regvar birth + genRegVarBirthStackFP(spillDsc->spillVarDsc); + + // Mark as not spilled any more + spillDsc->spillVarDsc->lvSpilled = false; + + // Update stack layout. + compCurFPState.Push(spillDsc->spillVarDsc->lvRegNum); + } + else + { + assert(spillDsc->spillTree->gtFlags & GTF_SPILLED); + + spillDsc->spillTree->gtFlags &= ~GTF_SPILLED; + + regNumber reg = regSet.PickRegFloat(); + genMarkTreeInReg(spillDsc->spillTree, reg); + regSet.SetUsedRegFloat(spillDsc->spillTree, true); + + compCurFPState.Push(reg); + } + + // load from spilled spot + emitAttr size = EmitSize(spillDsc->spillTemp->tdTempType()); + getEmitter()->emitIns_S(INS_fld, size, spillDsc->spillTemp->tdTempNum(), 0); +} + +// unspills any reg var that we have in the spill list. We need this +// because we can't have any spilled vars across basic blocks +void CodeGen::UnspillRegVarsStackFp() +{ + RegSet::SpillDsc* cur; + RegSet::SpillDsc* next; + + for (cur = regSet.rsSpillFloat; cur; cur = next) + { + next = cur->spillNext; + + if (cur->bEnregisteredVariable) + { + UnspillFloat(cur); + } + } +} + +#ifdef DEBUG +const char* regNamesFP[] = { +#define REGDEF(name, rnum, mask, sname) sname, +#include "registerfp.h" +}; + +// static +const char* CodeGenInterface::regVarNameStackFP(regNumber reg) +{ + return regNamesFP[reg]; +} + +bool CodeGen::ConsistentAfterStatementStackFP() +{ + if (!compCurFPState.IsConsistent()) + { + return false; + } + + if (regSet.rsMaskUsedFloat != 0) + { + assert(!"FP register marked as used after statement"); + return false; + } + if (regSet.rsMaskLockedFloat != 0) + { + assert(!"FP register marked as locked after statement"); + return false; + } + if (genCountBits(regSet.rsMaskRegVarFloat) > compCurFPState.m_uStackSize) + { + assert(!"number of FP regvars in regSet.rsMaskRegVarFloat doesnt match current FP state"); + return false; + } + + return true; +} + +#endif + +int CodeGen::genNumberTemps() +{ + return compCurFPState.m_uStackSize - genCountBits(regSet.rsMaskRegVarFloat); +} + +void CodeGen::genDiscardStackFP(GenTreePtr tree) +{ + assert(tree->InReg()); + assert(varTypeIsFloating(tree)); + + FlatFPX87_Unload(&compCurFPState, tree->gtRegNum, true); +} + +void CodeGen::genRegRenameWithMasks(regNumber dstReg, regNumber srcReg) +{ + regMaskTP dstregmask = genRegMaskFloat(dstReg); + regMaskTP srcregmask = genRegMaskFloat(srcReg); + + // rename use register + compCurFPState.Rename(dstReg, srcReg); + + regSet.rsMaskUsedFloat &= ~srcregmask; + regSet.rsMaskUsedFloat |= dstregmask; + + if (srcregmask & regSet.rsMaskLockedFloat) + { + assert((dstregmask & regSet.rsMaskLockedFloat) == 0); + // We will set the new one as locked + regSet.rsMaskLockedFloat &= ~srcregmask; + regSet.rsMaskLockedFloat |= dstregmask; + } + + // Updated used tree + assert(!regSet.genUsedRegsFloat[dstReg]); + regSet.genUsedRegsFloat[dstReg] = regSet.genUsedRegsFloat[srcReg]; + regSet.genUsedRegsFloat[dstReg]->gtRegNum = dstReg; + regSet.genUsedRegsFloat[srcReg] = NULL; +} + +void CodeGen::genRegVarBirthStackFP(LclVarDsc* varDsc) +{ + // Mark the virtual register we're assigning to this local; + regNumber reg = varDsc->lvRegNum; + +#ifdef DEBUG + regMaskTP regmask = genRegMaskFloat(reg); +#endif + + assert(varDsc->lvTracked && varDsc->lvRegister && reg != REG_FPNONE); + if (regSet.genUsedRegsFloat[reg]) + { + + // Register was marked as used... will have to rename it so we can put the + // regvar where it belongs. + JITDUMP("Renaming used register %s\n", regVarNameStackFP(reg)); + + regNumber newreg; + + newreg = regSet.PickRegFloat(); + +#ifdef DEBUG + regMaskTP newregmask = genRegMaskFloat(newreg); +#endif + + // Update used mask + assert((regSet.rsMaskUsedFloat & regmask) && (regSet.rsMaskUsedFloat & newregmask) == 0); + + genRegRenameWithMasks(newreg, reg); + } + + // Mark the reg as holding a regvar + varDsc->lvSpilled = false; + SetRegVarFloat(reg, varDsc->TypeGet(), varDsc); +} + +void CodeGen::genRegVarBirthStackFP(GenTreePtr tree) +{ +#ifdef DEBUG + if (compiler->verbose) + { + printf("variable V%i is going live in ", tree->gtLclVarCommon.gtLclNum); + Compiler::printTreeID(tree); + printf("\n"); + } +#endif // DEBUG + + // Update register in local var + LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum; + + genRegVarBirthStackFP(varDsc); + assert(tree->gtRegNum == tree->gtRegVar.gtRegNum && tree->gtRegNum == varDsc->lvRegNum); +} + +void CodeGen::genRegVarDeathStackFP(LclVarDsc* varDsc) +{ + regNumber reg = varDsc->lvRegNum; + + assert(varDsc->lvTracked && varDsc->lvRegister && reg != REG_FPNONE); + SetRegVarFloat(reg, varDsc->TypeGet(), 0); +} + +void CodeGen::genRegVarDeathStackFP(GenTreePtr tree) +{ +#ifdef DEBUG + if (compiler->verbose) + { + printf("register %s is going dead in ", regVarNameStackFP(tree->gtRegVar.gtRegNum)); + Compiler::printTreeID(tree); + printf("\n"); + } +#endif // DEBUG + + LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum; + genRegVarDeathStackFP(varDsc); +} + +void CodeGen::genLoadStackFP(GenTreePtr tree, regNumber reg) +{ +#ifdef DEBUG + if (compiler->verbose) + { + printf("genLoadStackFP"); + Compiler::printTreeID(tree); + printf(" %s\n", regVarNameStackFP(reg)); + } +#endif // DEBUG + + if (tree->IsRegVar()) + { + // if it has been spilled, unspill it.% + LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum]; + if (varDsc->lvSpilled) + { + UnspillFloat(varDsc); + } + + // if it's dying, just rename the register, else load it normally + if (tree->IsRegVarDeath()) + { + genRegVarDeathStackFP(tree); + compCurFPState.Rename(reg, tree->gtRegVar.gtRegNum); + } + else + { + assert(tree->gtRegNum == tree->gtRegVar.gtRegNum); + inst_FN(INS_fld, compCurFPState.VirtualToST(tree->gtRegVar.gtRegNum)); + FlatFPX87_PushVirtual(&compCurFPState, reg); + } + } + else + { + FlatFPX87_PushVirtual(&compCurFPState, reg); + inst_FS_TT(INS_fld, tree); + } +} + +void CodeGen::genMovStackFP(GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg) +{ + if (dstreg == REG_FPNONE && !dst->IsRegVar()) + { + regNumber reg; + + // reg to mem path + if (srcreg == REG_FPNONE) + { + assert(src->IsRegVar()); + reg = src->gtRegNum; + } + else + { + reg = srcreg; + } + + // Mov src to top of the stack + FlatFPX87_MoveToTOS(&compCurFPState, reg); + + if (srcreg != REG_FPNONE || (src->IsRegVar() && src->IsRegVarDeath())) + { + // Emit instruction + inst_FS_TT(INS_fstp, dst); + + // Update stack + compCurFPState.Pop(); + } + else + { + inst_FS_TT(INS_fst, dst); + } + } + else + { + if (dstreg == REG_FPNONE) + { + assert(dst->IsRegVar()); + dstreg = dst->gtRegNum; + } + + if (srcreg == REG_FPNONE && !src->IsRegVar()) + { + // mem to reg + assert(dst->IsRegVar() && dst->IsRegVarBirth()); + + FlatFPX87_PushVirtual(&compCurFPState, dstreg); + FlatFPX87_MoveToTOS(&compCurFPState, dstreg); + + if (src->gtOper == GT_CNS_DBL) + { + genConstantLoadStackFP(src); + } + else + { + inst_FS_TT(INS_fld, src); + } + } + else + { + // disposable reg to reg, use renaming + assert(dst->IsRegVar() && dst->IsRegVarBirth()); + assert(src->IsRegVar() || (src->InReg())); + assert(src->gtRegNum != REG_FPNONE); + + if ((src->InReg()) || (src->IsRegVar() && src->IsRegVarDeath())) + { + // src is disposable and dst is a regvar, so we'll rename src to dst + + // SetupOp should have masked out the regvar + assert(!src->IsRegVar() || !src->IsRegVarDeath() || + !(genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat)); + + // get slot that holds the value + unsigned uStack = compCurFPState.m_uVirtualMap[src->gtRegNum]; + + // unlink the slot that holds the value + compCurFPState.Unmap(src->gtRegNum); + + regNumber tgtreg = dst->gtRegVar.gtRegNum; + + compCurFPState.IgnoreConsistencyChecks(true); + + if (regSet.genUsedRegsFloat[tgtreg]) + { + // tgtreg is used, we move it to src reg. We do this here as src reg won't be + // marked as used, if tgtreg is used it srcreg will be a candidate for moving + // which is something we don't want, so we do the renaming here. + genRegRenameWithMasks(src->gtRegNum, tgtreg); + } + + compCurFPState.IgnoreConsistencyChecks(false); + + // Birth of FP var + genRegVarBirthStackFP(dst); + + // Associate target reg with source physical register + compCurFPState.Associate(tgtreg, uStack); + } + else + { + if (src->IsRegVar()) + { + // regvar that isnt dying to regvar + assert(!src->IsRegVarDeath()); + + // Birth of FP var + genRegVarBirthStackFP(dst); + + // Load register + inst_FN(INS_fld, compCurFPState.VirtualToST(src->gtRegVar.gtRegNum)); + + // update our logic stack + FlatFPX87_PushVirtual(&compCurFPState, dst->gtRegVar.gtRegNum); + } + else + { + // memory to regvar + + // Birth of FP var + genRegVarBirthStackFP(dst); + + // load into stack + inst_FS_TT(INS_fld, src); + + // update our logic stack + FlatFPX87_PushVirtual(&compCurFPState, dst->gtRegVar.gtRegNum); + } + } + } + } +} + +void CodeGen::genCodeForTreeStackFP_DONE(GenTreePtr tree, regNumber reg) +{ + return genCodeForTree_DONE(tree, reg); +} + +// Does the setup of the FP stack on entry to block +void CodeGen::genSetupStateStackFP(BasicBlock* block) +{ + bool bGenerate = !block->bbFPStateX87; + if (bGenerate) + { + // Allocate FP state + block->bbFPStateX87 = FlatFPAllocFPState(); + block->bbFPStateX87->Init(); + } + + // Update liveset and lock enregistered live vars on entry + VARSET_TP VARSET_INIT_NOCOPY(liveSet, + VarSetOps::Intersection(compiler, block->bbLiveIn, compiler->optAllFPregVars)); + + if (!VarSetOps::IsEmpty(compiler, liveSet)) + { + unsigned varNum; + LclVarDsc* varDsc; + + for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++) + { + if (varDsc->IsFloatRegType() && varDsc->lvRegister) + { + + unsigned varIndex = varDsc->lvVarIndex; + + // Is this variable live on entry? + if (VarSetOps::IsMember(compiler, liveSet, varIndex)) + { + JITDUMP("genSetupStateStackFP(): enregistered variable V%i is live on entry to block\n", varNum); + + assert(varDsc->lvTracked); + assert(varDsc->lvRegNum != REG_FPNONE); + + genRegVarBirthStackFP(varDsc); + + if (bGenerate) + { + // If we're generating layout, update it. + block->bbFPStateX87->Push(varDsc->lvRegNum); + } + } + } + } + } + + compCurFPState.Init(block->bbFPStateX87); + + assert(block->bbFPStateX87->IsConsistent()); +} + +regMaskTP CodeGen::genPushArgumentStackFP(GenTreePtr args) +{ + regMaskTP addrReg = 0; + unsigned opsz = genTypeSize(genActualType(args->TypeGet())); + + switch (args->gtOper) + { + GenTreePtr temp; + GenTreePtr fval; + size_t flopsz; + + case GT_CNS_DBL: + { + float f = 0.0; + int* addr = NULL; + if (args->TypeGet() == TYP_FLOAT) + { + f = (float)args->gtDblCon.gtDconVal; + // *(long*) (&f) used instead of *addr because of of strict + // pointer aliasing optimization. According to the ISO C/C++ + // standard, an optimizer can assume two pointers of + // non-compatible types do not point to the same memory. + inst_IV(INS_push, *((int*)(&f))); + genSinglePush(); + addrReg = 0; + } + else + { + addr = (int*)&args->gtDblCon.gtDconVal; + + // store forwarding fix for pentium 4 and Centrino + // (even for down level CPUs as we don't care about their perf any more) + fval = genMakeConst(&args->gtDblCon.gtDconVal, args->gtType, args, true); + inst_FS_TT(INS_fld, fval); + flopsz = (size_t)8; + inst_RV_IV(INS_sub, REG_ESP, flopsz, EA_PTRSIZE); + getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(flopsz), REG_NA, REG_ESP, 0); + genSinglePush(); + genSinglePush(); + + addrReg = 0; + } + + break; + } + + case GT_CAST: + { + // Is the value a cast from double ? + if ((args->gtOper == GT_CAST) && (args->CastFromType() == TYP_DOUBLE)) + { + /* Load the value onto the FP stack */ + + genCodeForTreeFlt(args->gtCast.CastOp(), false); + + /* Go push the value as a float/double */ + args = args->gtCast.CastOp(); + + addrReg = 0; + goto PUSH_FLT; + } + // Fall through to default case.... + } + default: + { + temp = genMakeAddrOrFPstk(args, &addrReg, false); + if (temp) + { + unsigned offs; + + // We have the address of the float operand, push its bytes + offs = opsz; + assert(offs % sizeof(int) == 0); + + if (offs == 4) + { + assert(args->gtType == temp->gtType); + do + { + offs -= sizeof(int); + inst_TT(INS_push, temp, offs); + genSinglePush(); + } while (offs); + } + else + { + // store forwarding fix for pentium 4 and Centrino + inst_FS_TT(INS_fld, temp); + flopsz = (size_t)offs; + inst_RV_IV(INS_sub, REG_ESP, (size_t)flopsz, EA_PTRSIZE); + getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(flopsz), REG_NA, REG_ESP, 0); + genSinglePush(); + genSinglePush(); + } + } + else + { + // The argument is on the FP stack -- pop it into [ESP-4/8] + + PUSH_FLT: + + inst_RV_IV(INS_sub, REG_ESP, opsz, EA_PTRSIZE); + + genSinglePush(); + if (opsz == 2 * sizeof(unsigned)) + genSinglePush(); + + // Take reg to top of stack + FlatFPX87_MoveToTOS(&compCurFPState, args->gtRegNum); + + // Pop it off to stack + compCurFPState.Pop(); + getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(opsz), REG_NA, REG_ESP, 0); + } + + gcInfo.gcMarkRegSetNpt(addrReg); + break; + } + } + + return addrReg; +} + +void CodeGen::genRoundFpExpressionStackFP(GenTreePtr op, var_types type) +{ + // Do nothing with memory resident opcodes - these are the right precision + // (even if genMakeAddrOrFPstk loads them to the FP stack) + if (type == TYP_UNDEF) + type = op->TypeGet(); + + switch (op->gtOper) + { + case GT_LCL_VAR: + case GT_LCL_FLD: + case GT_CLS_VAR: + case GT_CNS_DBL: + case GT_IND: + case GT_LEA: + if (type == op->TypeGet()) + return; + default: + break; + } + + assert(op->gtRegNum != REG_FPNONE); + + // Take register to top of stack + FlatFPX87_MoveToTOS(&compCurFPState, op->gtRegNum); + + // Allocate a temp for the expression + TempDsc* temp = compiler->tmpGetTemp(type); + + // Store the FP value into the temp + inst_FS_ST(INS_fstp, EmitSize(type), temp, 0); + + // Load the value back onto the FP stack + inst_FS_ST(INS_fld, EmitSize(type), temp, 0); + + // We no longer need the temp + compiler->tmpRlsTemp(temp); +} + +void CodeGen::genCodeForTreeStackFP_Const(GenTreePtr tree) +{ +#ifdef DEBUG + if (compiler->verbose) + { + printf("genCodeForTreeStackFP_Const() "); + Compiler::printTreeID(tree); + printf("\n"); + } +#endif // DEBUG + +#ifdef DEBUG + if (tree->OperGet() != GT_CNS_DBL) + { + compiler->gtDispTree(tree); + assert(!"bogus float const"); + } +#endif + // Pick register + regNumber reg = regSet.PickRegFloat(); + + // Load constant + genConstantLoadStackFP(tree); + + // Push register to virtual stack + FlatFPX87_PushVirtual(&compCurFPState, reg); + + // Update tree + genCodeForTreeStackFP_DONE(tree, reg); +} + +void CodeGen::genCodeForTreeStackFP_Leaf(GenTreePtr tree) +{ +#ifdef DEBUG + if (compiler->verbose) + { + printf("genCodeForTreeStackFP_Leaf() "); + Compiler::printTreeID(tree); + printf("\n"); + } +#endif // DEBUG + + switch (tree->OperGet()) + { + case GT_LCL_VAR: + case GT_LCL_FLD: + { + assert(!compiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvRegister); + + // Pick register + regNumber reg = regSet.PickRegFloat(); + + // Load it + genLoadStackFP(tree, reg); + + genCodeForTreeStackFP_DONE(tree, reg); + + break; + } + + case GT_REG_VAR: + { + regNumber reg = regSet.PickRegFloat(); + + genLoadStackFP(tree, reg); + + genCodeForTreeStackFP_DONE(tree, reg); + + break; + } + + case GT_CLS_VAR: + { + // Pick register + regNumber reg = regSet.PickRegFloat(); + + // Load it + genLoadStackFP(tree, reg); + + genCodeForTreeStackFP_DONE(tree, reg); + + break; + } + + default: +#ifdef DEBUG + compiler->gtDispTree(tree); +#endif + assert(!"unexpected leaf"); + } + + genUpdateLife(tree); +} + +void CodeGen::genCodeForTreeStackFP_Asg(GenTreePtr tree) +{ +#ifdef DEBUG + if (compiler->verbose) + { + printf("genCodeForTreeStackFP_Asg() "); + Compiler::printTreeID(tree); + printf("\n"); + } +#endif // DEBUG + + emitAttr size; + unsigned offs; + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + + assert(tree->OperGet() == GT_ASG); + + if (!op1->IsRegVar() && (op2->gtOper == GT_CAST) && (op1->gtType == op2->gtType) && + varTypeIsFloating(op2->gtCast.CastOp())) + { + /* We can discard the cast */ + op2 = op2->gtCast.CastOp(); + } + + size = EmitSize(op1); + offs = 0; + + // If lhs is a comma expression, evaluate the non-last parts, make op1 be the remainder. + // (But can't do this if the assignment is reversed...) + if ((tree->gtFlags & GTF_REVERSE_OPS) == 0) + { + op1 = genCodeForCommaTree(op1); + } + + GenTreePtr op1NonCom = op1->gtEffectiveVal(); + if (op1NonCom->gtOper == GT_LCL_VAR) + { +#ifdef DEBUG + LclVarDsc* varDsc = &compiler->lvaTable[op1NonCom->gtLclVarCommon.gtLclNum]; + // No dead stores + assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1NonCom->gtFlags & GTF_VAR_DEATH)); +#endif + +#ifdef DEBUGGING_SUPPORT + + /* For non-debuggable code, every definition of a lcl-var has + * to be checked to see if we need to open a new scope for it. + */ + + if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0)) + { + siCheckVarScope(op1NonCom->gtLclVarCommon.gtLclNum, op1NonCom->gtLclVar.gtLclILoffs); + } +#endif + } + + assert(op2); + switch (op2->gtOper) + { + case GT_CNS_DBL: + + assert(compCurFPState.m_uStackSize <= FP_PHYSICREGISTERS); + + regMaskTP addrRegInt; + addrRegInt = 0; + regMaskTP addrRegFlt; + addrRegFlt = 0; + + // op2 is already "evaluated," so doesn't matter if they're reversed or not... + op1 = genCodeForCommaTree(op1); + op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt); + + // We want to 'cast' the constant to the op1'a type + double constantValue; + constantValue = op2->gtDblCon.gtDconVal; + if (op1->gtType == TYP_FLOAT) + { + float temp = forceCastToFloat(constantValue); + constantValue = (double)temp; + } + + GenTreePtr constantTree; + constantTree = compiler->gtNewDconNode(constantValue); + if (genConstantLoadStackFP(constantTree, true)) + { + if (op1->IsRegVar()) + { + // regvar birth + genRegVarBirthStackFP(op1); + + // Update + compCurFPState.Push(op1->gtRegNum); + } + else + { + // store in target + inst_FS_TT(INS_fstp, op1); + } + } + else + { + // Standard constant + if (op1->IsRegVar()) + { + // Load constant to fp stack. + + GenTreePtr cnsaddr; + + // Create slot for constant + if (op1->gtType == TYP_FLOAT || StackFPIsSameAsFloat(op2->gtDblCon.gtDconVal)) + { + // We're going to use that double as a float, so recompute addr + float f = forceCastToFloat(op2->gtDblCon.gtDconVal); + cnsaddr = genMakeConst(&f, TYP_FLOAT, tree, true); + } + else + { + cnsaddr = genMakeConst(&op2->gtDblCon.gtDconVal, TYP_DOUBLE, tree, true); + } + + // Load into stack + inst_FS_TT(INS_fld, cnsaddr); + + // regvar birth + genRegVarBirthStackFP(op1); + + // Update + compCurFPState.Push(op1->gtRegNum); + } + else + { + if (size == 4) + { + + float f = forceCastToFloat(op2->gtDblCon.gtDconVal); + int* addr = (int*)&f; + + do + { + inst_TT_IV(INS_mov, op1, *addr++, offs); + offs += sizeof(int); + } while (offs < size); + } + else + { + // store forwarding fix for pentium 4 and centrino and also + // fld for doubles that can be represented as floats, saving + // 4 bytes of load + GenTreePtr cnsaddr; + + // Create slot for constant + if (op1->gtType == TYP_FLOAT || StackFPIsSameAsFloat(op2->gtDblCon.gtDconVal)) + { + // We're going to use that double as a float, so recompute addr + float f = forceCastToFloat(op2->gtDblCon.gtDconVal); + cnsaddr = genMakeConst(&f, TYP_FLOAT, tree, true); + } + else + { + assert(tree->gtType == TYP_DOUBLE); + cnsaddr = genMakeConst(&op2->gtDblCon.gtDconVal, TYP_DOUBLE, tree, true); + } + + inst_FS_TT(INS_fld, cnsaddr); + inst_FS_TT(INS_fstp, op1); + } + } + } + + genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG); + genUpdateLife(op1); + return; + + default: + break; + } + + // Not one of the easy optimizations. Proceed normally + if (tree->gtFlags & GTF_REVERSE_OPS) + { + /* Evaluate the RHS onto the FP stack. + We don't need to round it as we will be doing a spill for + the assignment anyway (unless op1 is a GT_REG_VAR). */ + + genSetupForOpStackFP(op1, op2, true, true, false, true); + + // Do the move + genMovStackFP(op1, REG_FPNONE, op2, (op2->InReg()) ? op2->gtRegNum : REG_FPNONE); + } + else + { + // Have to evaluate left side before + + // This should never happen + assert(!op1->IsRegVar()); + + genSetupForOpStackFP(op1, op2, false, true, false, true); + + // Do the actual move + genMovStackFP(op1, REG_FPNONE, op2, (op2->InReg()) ? op2->gtRegNum : REG_FPNONE); + } +} + +void CodeGen::genSetupForOpStackFP( + GenTreePtr& op1, GenTreePtr& op2, bool bReverse, bool bMakeOp1Addressable, bool bOp1ReadOnly, bool bOp2ReadOnly) +{ + if (bMakeOp1Addressable) + { + if (bReverse) + { + genSetupForOpStackFP(op2, op1, false, false, bOp2ReadOnly, bOp1ReadOnly); + } + else + { + regMaskTP addrRegInt = 0; + regMaskTP addrRegFlt = 0; + + op1 = genCodeForCommaTree(op1); + + // Evaluate RHS on FP stack + if (bOp2ReadOnly && op2->IsRegVar() && !op2->IsRegVarDeath()) + { + // read only and not dying, so just make addressable + op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt); + genKeepAddressableStackFP(op1, &addrRegInt, &addrRegFlt); + genUpdateLife(op2); + } + else + { + // Make target addressable + op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt); + + op2 = genCodeForCommaTree(op2); + + genCodeForTreeFloat(op2); + + regSet.SetUsedRegFloat(op2, true); + regSet.SetLockedRegFloat(op2, true); + + // Make sure target is still adressable + genKeepAddressableStackFP(op1, &addrRegInt, &addrRegFlt); + + regSet.SetLockedRegFloat(op2, false); + regSet.SetUsedRegFloat(op2, false); + } + + /* Free up anything that was tied up by the target address */ + genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG); + } + } + else + { + assert(!bReverse || + !"Can't do this. if op2 is a reg var and dies in op1, we have a serious problem. For the " + "moment, handle this in the caller"); + + regMaskTP addrRegInt = 0; + regMaskTP addrRegFlt = 0; + + op1 = genCodeForCommaTree(op1); + + if (bOp1ReadOnly && op1->IsRegVar() && !op1->IsRegVarDeath() && + !genRegVarDiesInSubTree(op2, op1->gtRegVar.gtRegNum)) // regvar can't die in op2 either + { + // First update liveness for op1, since we're "evaluating" it here + genUpdateLife(op1); + + op2 = genCodeForCommaTree(op2); + + // read only and not dying, we dont have to do anything. + op2 = genMakeAddressableStackFP(op2, &addrRegInt, &addrRegFlt); + genKeepAddressableStackFP(op2, &addrRegInt, &addrRegFlt); + } + else + { + genCodeForTreeFloat(op1); + + regSet.SetUsedRegFloat(op1, true); + + op2 = genCodeForCommaTree(op2); + + op2 = genMakeAddressableStackFP(op2, &addrRegInt, &addrRegFlt); + + // Restore op1 if necessary + if (op1->gtFlags & GTF_SPILLED) + { + UnspillFloat(op1); + } + + // Lock op1 + regSet.SetLockedRegFloat(op1, true); + + genKeepAddressableStackFP(op2, &addrRegInt, &addrRegFlt); + + // unlock op1 + regSet.SetLockedRegFloat(op1, false); + + // mark as free + regSet.SetUsedRegFloat(op1, false); + } + + genDoneAddressableStackFP(op2, addrRegInt, addrRegFlt, RegSet::KEEP_REG); + } +} + +void CodeGen::genCodeForTreeStackFP_Arithm(GenTreePtr tree) +{ +#ifdef DEBUG + if (compiler->verbose) + { + printf("genCodeForTreeStackFP_Arithm() "); + Compiler::printTreeID(tree); + printf("\n"); + } +#endif // DEBUG + + assert(tree->OperGet() == GT_ADD || tree->OperGet() == GT_SUB || tree->OperGet() == GT_MUL || + tree->OperGet() == GT_DIV); + + // We handle the reverse here instead of leaving setupop to do it. As for this case + // + // + with reverse + // op1 regvar + // + // and in regvar dies in op1, we would need a load of regvar, instead of a noop. So we handle this + // here and tell genArithmStackFP to do the reverse operation + bool bReverse; + + GenTreePtr op1, op2; + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + bReverse = true; + op1 = tree->gtGetOp2(); + op2 = tree->gtOp.gtOp1; + } + else + { + bReverse = false; + op1 = tree->gtOp.gtOp1; + op2 = tree->gtGetOp2(); + } + + regNumber result; + + // Fast paths + genTreeOps oper = tree->OperGet(); + if (op1->IsRegVar() && op2->IsRegVar() && !op1->IsRegVarDeath() && op2->IsRegVarDeath()) + { + // In this fastpath, we will save a load by doing the operation directly on the op2 + // register, as it's dying. + + // Mark op2 as dead + genRegVarDeathStackFP(op2); + + // Do operation + result = genArithmStackFP(oper, op2, op2->gtRegVar.gtRegNum, op1, REG_FPNONE, !bReverse); + + genUpdateLife(op1); + genUpdateLife(op2); + } + else if (!op1->IsRegVar() && // We don't do this for regvars, as we'll need a scratch reg + ((tree->gtFlags & GTF_SIDE_EFFECT) == 0) && // No side effects + GenTree::Compare(op1, op2)) // op1 and op2 are the same + { + // op1 is same thing as op2. Ideal for CSEs that werent optimized + // due to their low cost. + + // First we need to update lifetimes from op1 + VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, genUpdateLiveSetForward(op1)); + compiler->compCurLifeTree = op1; + + genCodeForTreeFloat(op2); + + result = genArithmStackFP(oper, op2, op2->gtRegNum, op2, op2->gtRegNum, bReverse); + } + else + { + genSetupForOpStackFP(op1, op2, false, false, false, true); + + result = genArithmStackFP(oper, op1, (op1->InReg()) ? op1->gtRegNum : REG_FPNONE, op2, + (op2->InReg()) ? op2->gtRegNum : REG_FPNONE, bReverse); + } + + genCodeForTreeStackFP_DONE(tree, result); +} + +regNumber CodeGen::genArithmStackFP( + genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg, bool bReverse) +{ +#ifdef DEBUG + if (compiler->verbose) + { + printf("genArithmStackFP() dst: "); + Compiler::printTreeID(dst); + printf(" src: "); + Compiler::printTreeID(src); + printf(" dstreg: %s srcreg: %s\n", dstreg == REG_FPNONE ? "NONE" : regVarNameStackFP(dstreg), + srcreg == REG_FPNONE ? "NONE" : regVarNameStackFP(srcreg)); + } +#endif // DEBUG + + // Select instruction depending on oper and bReverseOp + + instruction ins_NN; + instruction ins_RN; + instruction ins_RP; + instruction ins_NP; + + switch (oper) + { + default: + assert(!"Unexpected oper"); + case GT_ADD: + case GT_SUB: + case GT_MUL: + case GT_DIV: + + /* Make sure the instruction tables look correctly ordered */ + assert(FPmathNN[GT_ADD - GT_ADD] == INS_fadd); + assert(FPmathNN[GT_SUB - GT_ADD] == INS_fsub); + assert(FPmathNN[GT_MUL - GT_ADD] == INS_fmul); + assert(FPmathNN[GT_DIV - GT_ADD] == INS_fdiv); + + assert(FPmathNP[GT_ADD - GT_ADD] == INS_faddp); + assert(FPmathNP[GT_SUB - GT_ADD] == INS_fsubp); + assert(FPmathNP[GT_MUL - GT_ADD] == INS_fmulp); + assert(FPmathNP[GT_DIV - GT_ADD] == INS_fdivp); + + assert(FPmathRN[GT_ADD - GT_ADD] == INS_fadd); + assert(FPmathRN[GT_SUB - GT_ADD] == INS_fsubr); + assert(FPmathRN[GT_MUL - GT_ADD] == INS_fmul); + assert(FPmathRN[GT_DIV - GT_ADD] == INS_fdivr); + + assert(FPmathRP[GT_ADD - GT_ADD] == INS_faddp); + assert(FPmathRP[GT_SUB - GT_ADD] == INS_fsubrp); + assert(FPmathRP[GT_MUL - GT_ADD] == INS_fmulp); + assert(FPmathRP[GT_DIV - GT_ADD] == INS_fdivrp); + + if (bReverse) + { + ins_NN = FPmathRN[oper - GT_ADD]; + ins_NP = FPmathRP[oper - GT_ADD]; + ins_RN = FPmathNN[oper - GT_ADD]; + ins_RP = FPmathNP[oper - GT_ADD]; + } + else + { + ins_NN = FPmathNN[oper - GT_ADD]; + ins_NP = FPmathNP[oper - GT_ADD]; + ins_RN = FPmathRN[oper - GT_ADD]; + ins_RP = FPmathRP[oper - GT_ADD]; + } + } + + regNumber result = REG_FPNONE; + + if (dstreg != REG_FPNONE) + { + if (srcreg == REG_FPNONE) + { + if (src->IsRegVar()) + { + if (src->IsRegVarDeath()) + { + if (compCurFPState.TopVirtual() == (unsigned)dst->gtRegNum) + { + // Do operation and store in srcreg + inst_FS(ins_RP, compCurFPState.VirtualToST(src->gtRegNum)); + + // kill current dst and rename src as dst. + FlatFPX87_Kill(&compCurFPState, dstreg); + compCurFPState.Rename(dstreg, src->gtRegNum); + } + else + { + // Take src to top of stack + FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum); + + // do reverse and pop operation + inst_FS(ins_NP, compCurFPState.VirtualToST(dstreg)); + + // Kill the register + FlatFPX87_Kill(&compCurFPState, src->gtRegNum); + } + + assert(!src->IsRegVar() || !src->IsRegVarDeath() || + !(genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat)); + } + else + { + if (compCurFPState.TopVirtual() == (unsigned)src->gtRegNum) + { + inst_FS(ins_RN, compCurFPState.VirtualToST(dst->gtRegNum)); + } + else + { + FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum); + inst_FN(ins_NN, compCurFPState.VirtualToST(src->gtRegNum)); + } + } + } + else + { + // do operation with memory and store in dest + FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum); + inst_FS_TT(ins_NN, src); + } + } + else + { + if (dstreg == srcreg) + { + FlatFPX87_MoveToTOS(&compCurFPState, dstreg); + inst_FN(ins_NN, compCurFPState.VirtualToST(dstreg)); + } + else + { + if (compCurFPState.TopVirtual() == (unsigned)dst->gtRegNum) + { + // Do operation and store in srcreg + inst_FS(ins_RP, compCurFPState.VirtualToST(srcreg)); + + // kill current dst and rename src as dst. + FlatFPX87_Kill(&compCurFPState, dstreg); + compCurFPState.Rename(dstreg, srcreg); + } + else + { + FlatFPX87_MoveToTOS(&compCurFPState, srcreg); + + // do reverse and pop operation + inst_FS(ins_NP, compCurFPState.VirtualToST(dstreg)); + + // Kill the register + FlatFPX87_Kill(&compCurFPState, srcreg); + } + } + } + + result = dstreg; + } + else + { + assert(!"if we get here it means we didnt load op1 into a temp. Investigate why"); + } + + assert(result != REG_FPNONE); + return result; +} + +void CodeGen::genCodeForTreeStackFP_AsgArithm(GenTreePtr tree) +{ +#ifdef DEBUG + if (compiler->verbose) + { + printf("genCodeForTreeStackFP_AsgArithm() "); + Compiler::printTreeID(tree); + printf("\n"); + } +#endif // DEBUG + + assert(tree->OperGet() == GT_ASG_ADD || tree->OperGet() == GT_ASG_SUB || tree->OperGet() == GT_ASG_MUL || + tree->OperGet() == GT_ASG_DIV); + + GenTreePtr op1, op2; + + op1 = tree->gtOp.gtOp1; + op2 = tree->gtGetOp2(); + + genSetupForOpStackFP(op1, op2, (tree->gtFlags & GTF_REVERSE_OPS) ? true : false, true, false, true); + + regNumber result = genAsgArithmStackFP(tree->OperGet(), op1, (op1->InReg()) ? op1->gtRegNum : REG_FPNONE, op2, + (op2->InReg()) ? op2->gtRegNum : REG_FPNONE); + + genCodeForTreeStackFP_DONE(tree, result); +} + +regNumber CodeGen::genAsgArithmStackFP( + genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg) +{ + regNumber result = REG_FPNONE; + +#ifdef DEBUG + if (compiler->verbose) + { + printf("genAsgArithmStackFP() dst: "); + Compiler::printTreeID(dst); + printf(" src: "); + Compiler::printTreeID(src); + printf(" dstreg: %s srcreg: %s\n", dstreg == REG_FPNONE ? "NONE" : regVarNameStackFP(dstreg), + srcreg == REG_FPNONE ? "NONE" : regVarNameStackFP(srcreg)); + } +#endif // DEBUG + + instruction ins_NN; + instruction ins_RN; + instruction ins_RP; + instruction ins_NP; + + switch (oper) + { + default: + assert(!"Unexpected oper"); + break; + case GT_ASG_ADD: + case GT_ASG_SUB: + case GT_ASG_MUL: + case GT_ASG_DIV: + + assert(FPmathRN[GT_ASG_ADD - GT_ASG_ADD] == INS_fadd); + assert(FPmathRN[GT_ASG_SUB - GT_ASG_ADD] == INS_fsubr); + assert(FPmathRN[GT_ASG_MUL - GT_ASG_ADD] == INS_fmul); + assert(FPmathRN[GT_ASG_DIV - GT_ASG_ADD] == INS_fdivr); + + assert(FPmathRP[GT_ASG_ADD - GT_ASG_ADD] == INS_faddp); + assert(FPmathRP[GT_ASG_SUB - GT_ASG_ADD] == INS_fsubrp); + assert(FPmathRP[GT_ASG_MUL - GT_ASG_ADD] == INS_fmulp); + assert(FPmathRP[GT_ASG_DIV - GT_ASG_ADD] == INS_fdivrp); + + ins_NN = FPmathNN[oper - GT_ASG_ADD]; + ins_NP = FPmathNP[oper - GT_ASG_ADD]; + + ins_RN = FPmathRN[oper - GT_ASG_ADD]; + ins_RP = FPmathRP[oper - GT_ASG_ADD]; + + if (dstreg != REG_FPNONE) + { + assert(!"dst should be a regvar or memory"); + } + else + { + if (dst->IsRegVar()) + { + if (src->IsRegVar()) + { + if (src->IsRegVarDeath()) + { + // Take src to top of stack + FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum); + + // Do op + inst_FS(ins_NP, compCurFPState.VirtualToST(dst->gtRegNum)); + + // Kill the register + FlatFPX87_Kill(&compCurFPState, src->gtRegNum); + + // SetupOp should mark the regvar as dead + assert((genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat) == 0); + } + else + { + assert(src->gtRegNum == src->gtRegVar.gtRegNum && + "We shoudnt be loading regvar src on the stack as src is readonly"); + + // Take src to top of stack + FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum); + + // Do op + inst_FS(ins_RN, compCurFPState.VirtualToST(dst->gtRegNum)); + } + } + else + { + if (srcreg == REG_FPNONE) + { + // take enregistered variable to top of stack + FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum); + + // Do operation with mem + inst_FS_TT(ins_NN, src); + } + else + { + // take enregistered variable to top of stack + FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum); + + // do op + inst_FS(ins_NP, compCurFPState.VirtualToST(dst->gtRegNum)); + + // Kill the register + FlatFPX87_Kill(&compCurFPState, src->gtRegNum); + } + } + } + else + { + // To memory + if ((src->IsRegVar()) && !src->IsRegVarDeath()) + { + // We set src as read only, but as dst is in memory, we will need + // an extra physical register (which we should have, as we have a + // spare one for transitions). + // + // There used to be an assertion: assert(src->gtRegNum == src->gtRegVar.gtRegNum, ...) + // here, but there's actually no reason to assume that. AFAICT, for FP vars under stack FP, + // src->gtRegVar.gtRegNum is the allocated stack pseudo-register, but src->gtRegNum is the + // FP stack position into which that is loaded to represent a particular use of the variable. + inst_FN(INS_fld, compCurFPState.VirtualToST(src->gtRegNum)); + + // Do operation with mem + inst_FS_TT(ins_RN, dst); + + // store back + inst_FS_TT(INS_fstp, dst); + } + else + { + // put src in top of stack + FlatFPX87_MoveToTOS(&compCurFPState, srcreg); + + // Do operation with mem + inst_FS_TT(ins_RN, dst); + + // store back + inst_FS_TT(INS_fstp, dst); + + // SetupOp should have marked the regvar as dead in tat case + assert(!src->IsRegVar() || !src->IsRegVarDeath() || + (genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat) == 0); + + FlatFPX87_Kill(&compCurFPState, srcreg); + } + } + } + } + + return result; +} + +void CodeGen::genCodeForTreeStackFP_SmpOp(GenTreePtr tree) +{ +#ifdef DEBUG + if (compiler->verbose) + { + printf("genCodeForTreeStackFP_SmpOp() "); + Compiler::printTreeID(tree); + printf("\n"); + } +#endif // DEBUG + + assert(tree->OperKind() & GTK_SMPOP); + + switch (tree->OperGet()) + { + // Assignment + case GT_ASG: + { + genCodeForTreeStackFP_Asg(tree); + break; + } + + // Arithmetic binops + case GT_ADD: + case GT_SUB: + case GT_MUL: + case GT_DIV: + { + genCodeForTreeStackFP_Arithm(tree); + break; + } + + // Asg-Arithmetic ops + case GT_ASG_ADD: + case GT_ASG_SUB: + case GT_ASG_MUL: + case GT_ASG_DIV: + { + genCodeForTreeStackFP_AsgArithm(tree); + break; + } + + case GT_IND: + case GT_LEA: + { + regMaskTP addrReg; + + // Make sure the address value is 'addressable' */ + addrReg = genMakeAddressable(tree, 0, RegSet::FREE_REG); + + // Load the value onto the FP stack + regNumber reg = regSet.PickRegFloat(); + genLoadStackFP(tree, reg); + + genDoneAddressable(tree, addrReg, RegSet::FREE_REG); + + genCodeForTreeStackFP_DONE(tree, reg); + + break; + } + + case GT_RETURN: + { + GenTreePtr op1 = tree->gtOp.gtOp1; + assert(op1); + + // Compute the result onto the FP stack + if (op1->gtType == TYP_FLOAT) + { +#if ROUND_FLOAT + bool roundOp1 = false; + + switch (getRoundFloatLevel()) + { + case ROUND_NEVER: + /* No rounding at all */ + break; + + case ROUND_CMP_CONST: + break; + + case ROUND_CMP: + /* Round all comparands and return values*/ + roundOp1 = true; + break; + + case ROUND_ALWAYS: + /* Round everything */ + roundOp1 = true; + break; + + default: + assert(!"Unsupported Round Level"); + break; + } +#endif + genCodeForTreeFlt(op1); + } + else + { + assert(op1->gtType == TYP_DOUBLE); + genCodeForTreeFloat(op1); + +#if ROUND_FLOAT + if ((op1->gtOper == GT_CAST) && (op1->CastFromType() == TYP_LONG)) + genRoundFpExpressionStackFP(op1); +#endif + } + + // kill enregistered variables + compCurFPState.Pop(); + assert(compCurFPState.m_uStackSize == 0); + break; + } + + case GT_COMMA: + { + GenTreePtr op1 = tree->gtOp.gtOp1; + GenTreePtr op2 = tree->gtGetOp2(); + + if (tree->gtFlags & GTF_REVERSE_OPS) + { + genCodeForTreeFloat(op2); + + regSet.SetUsedRegFloat(op2, true); + + genEvalSideEffects(op1); + + if (op2->gtFlags & GTF_SPILLED) + { + UnspillFloat(op2); + } + + regSet.SetUsedRegFloat(op2, false); + } + else + { + genEvalSideEffects(op1); + genCodeForTreeFloat(op2); + } + + genCodeForTreeStackFP_DONE(tree, op2->gtRegNum); + break; + } + case GT_CAST: + { + genCodeForTreeStackFP_Cast(tree); + break; + } + + case GT_NEG: + { + GenTreePtr op1 = tree->gtOp.gtOp1; + + // get the tree into a register + genCodeForTreeFloat(op1); + + // Take reg to top of stack + FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum); + + // change the sign + instGen(INS_fchs); + + // mark register that holds tree + genCodeForTreeStackFP_DONE(tree, op1->gtRegNum); + return; + } + case GT_INTRINSIC: + { + assert(Compiler::IsMathIntrinsic(tree)); + + GenTreePtr op1 = tree->gtOp.gtOp1; + + // get tree into a register + genCodeForTreeFloat(op1); + + // Take reg to top of stack + FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum); + + static const instruction mathIns[] = { + INS_fsin, INS_fcos, INS_fsqrt, INS_fabs, INS_frndint, + }; + + assert(mathIns[CORINFO_INTRINSIC_Sin] == INS_fsin); + assert(mathIns[CORINFO_INTRINSIC_Cos] == INS_fcos); + assert(mathIns[CORINFO_INTRINSIC_Sqrt] == INS_fsqrt); + assert(mathIns[CORINFO_INTRINSIC_Abs] == INS_fabs); + assert(mathIns[CORINFO_INTRINSIC_Round] == INS_frndint); + assert((unsigned)(tree->gtIntrinsic.gtIntrinsicId) < sizeof(mathIns) / sizeof(mathIns[0])); + instGen(mathIns[tree->gtIntrinsic.gtIntrinsicId]); + + // mark register that holds tree + genCodeForTreeStackFP_DONE(tree, op1->gtRegNum); + + return; + } + case GT_CKFINITE: + { + TempDsc* temp; + int offs; + + GenTreePtr op1 = tree->gtOp.gtOp1; + + // Offset of the DWord containing the exponent + offs = (op1->gtType == TYP_FLOAT) ? 0 : sizeof(int); + + // get tree into a register + genCodeForTreeFloat(op1); + + // Take reg to top of stack + FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum); + + temp = compiler->tmpGetTemp(op1->TypeGet()); + emitAttr size = EmitSize(op1); + + // Store the value from the FP stack into the temp + getEmitter()->emitIns_S(INS_fst, size, temp->tdTempNum(), 0); + + regNumber reg = regSet.rsPickReg(); + + // Load the DWord containing the exponent into a general reg. + inst_RV_ST(INS_mov, reg, temp, offs, op1->TypeGet(), EA_4BYTE); + compiler->tmpRlsTemp(temp); + + // 'reg' now contains the DWord containing the exponent + regTracker.rsTrackRegTrash(reg); + + // Mask of exponent with all 1's - appropriate for given type + + int expMask; + expMask = (op1->gtType == TYP_FLOAT) ? 0x7F800000 // TYP_FLOAT + : 0x7FF00000; // TYP_DOUBLE + + // Check if the exponent is all 1's + + inst_RV_IV(INS_and, reg, expMask, EA_4BYTE); + inst_RV_IV(INS_cmp, reg, expMask, EA_4BYTE); + + // If exponent was all 1's, we need to throw ArithExcep + genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN); + + genUpdateLife(tree); + + genCodeForTreeStackFP_DONE(tree, op1->gtRegNum); + break; + } + default: + NYI("opertype"); + } +} + +void CodeGen::genCodeForTreeStackFP_Cast(GenTreePtr tree) +{ +#ifdef DEBUG + if (compiler->verbose) + { + printf("genCodeForTreeStackFP_Cast() "); + Compiler::printTreeID(tree); + printf("\n"); + } +#endif // DEBUG + +#if ROUND_FLOAT + bool roundResult = true; +#endif + + regMaskTP addrReg; + TempDsc* temp; + emitAttr size; + + GenTreePtr op1 = tree->gtOp.gtOp1; + + // If op1 is a comma expression, evaluate the non-last parts, make op1 be the rest. + op1 = genCodeForCommaTree(op1); + + switch (op1->gtType) + { + case TYP_BOOL: + case TYP_BYTE: + case TYP_UBYTE: + case TYP_CHAR: + case TYP_SHORT: + { + + // Operand too small for 'fild', load it into a register + genCodeForTree(op1, 0); + +#if ROUND_FLOAT + // no need to round, can't overflow float or dbl + roundResult = false; +#endif + + // fall through + } + case TYP_INT: + case TYP_BYREF: + case TYP_LONG: + { + // Can't 'fild' a constant, it has to be loaded from memory + switch (op1->gtOper) + { + case GT_CNS_INT: + op1 = genMakeConst(&op1->gtIntCon.gtIconVal, TYP_INT, tree, false); + break; + + case GT_CNS_LNG: + // Our encoder requires fild on m64int to be 64-bit aligned. + op1 = genMakeConst(&op1->gtLngCon.gtLconVal, TYP_LONG, tree, true); + break; + default: + break; + } + + addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG); + + // Grab register for the cast + regNumber reg = regSet.PickRegFloat(); + genMarkTreeInReg(tree, reg); + compCurFPState.Push(reg); + + // Is the value now sitting in a register? + if (op1->InReg()) + { + // We'll have to store the value into the stack */ + size = EA_ATTR(roundUp(genTypeSize(op1->gtType))); + temp = compiler->tmpGetTemp(op1->TypeGet()); + + // Move the value into the temp + if (op1->gtType == TYP_LONG) + { + regPairNo regPair = op1->gtRegPair; + + // This code is pretty ugly, but straightforward + + if (genRegPairLo(regPair) == REG_STK) + { + regNumber rg1 = genRegPairHi(regPair); + + assert(rg1 != REG_STK); + + /* Move enregistered half to temp */ + + inst_ST_RV(INS_mov, temp, 4, rg1, TYP_LONG); + + /* Move lower half to temp via "high register" */ + + inst_RV_TT(INS_mov, rg1, op1, 0); + inst_ST_RV(INS_mov, temp, 0, rg1, TYP_LONG); + + /* Reload transfer register */ + + inst_RV_ST(INS_mov, rg1, temp, 4, TYP_LONG); + } + else if (genRegPairHi(regPair) == REG_STK) + { + regNumber rg1 = genRegPairLo(regPair); + + assert(rg1 != REG_STK); + + /* Move enregistered half to temp */ + + inst_ST_RV(INS_mov, temp, 0, rg1, TYP_LONG); + + /* Move high half to temp via "low register" */ + + inst_RV_TT(INS_mov, rg1, op1, 4); + inst_ST_RV(INS_mov, temp, 4, rg1, TYP_LONG); + + /* Reload transfer register */ + + inst_RV_ST(INS_mov, rg1, temp, 0, TYP_LONG); + } + else + { + /* Move the value into the temp */ + + inst_ST_RV(INS_mov, temp, 0, genRegPairLo(regPair), TYP_LONG); + inst_ST_RV(INS_mov, temp, 4, genRegPairHi(regPair), TYP_LONG); + } + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + + /* Load the long from the temp */ + + inst_FS_ST(INS_fildl, size, temp, 0); + } + else + { + /* Move the value into the temp */ + + inst_ST_RV(INS_mov, temp, 0, op1->gtRegNum, TYP_INT); + + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + + /* Load the integer from the temp */ + + inst_FS_ST(INS_fild, size, temp, 0); + } + + // We no longer need the temp + compiler->tmpRlsTemp(temp); + } + else + { + // Load the value from its address + if (op1->gtType == TYP_LONG) + inst_TT(INS_fildl, op1); + else + inst_TT(INS_fild, op1); + + genDoneAddressable(op1, addrReg, RegSet::FREE_REG); + } + +#if ROUND_FLOAT + /* integer to fp conversions can overflow. roundResult + * is cleared above in cases where it can't + */ + if (roundResult && + ((tree->gtType == TYP_FLOAT) || ((tree->gtType == TYP_DOUBLE) && (op1->gtType == TYP_LONG)))) + genRoundFpExpression(tree); +#endif + + break; + } + case TYP_FLOAT: + { + // This is a cast from float to double. + // Note that conv.r(r4/r8) and conv.r8(r4/r9) are indistinguishable + // as we will generate GT_CAST-TYP_DOUBLE for both. This would + // cause us to truncate precision in either case. However, + // conv.r was needless in the first place, and should have + // been removed */ + genCodeForTreeFloat(op1); // Trucate its precision + + if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD || op1->gtOper == GT_CLS_VAR || + op1->gtOper == GT_IND || op1->gtOper == GT_LEA) + { + // We take advantage here of the fact that we know that our + // codegen will have just loaded this from memory, and that + // therefore, no cast is really needed. + // Ideally we wouldn't do this optimization here, but in + // morphing, however, we need to do this after regalloc, as + // this optimization doesnt apply if what we're loading is a + // regvar + } + else + { + genRoundFpExpressionStackFP(op1, tree->TypeGet()); + } + + // Assign reg to tree + genMarkTreeInReg(tree, op1->gtRegNum); + + break; + } + case TYP_DOUBLE: + { + // This is a cast from double to float or double + // Load the value, store as destType, load back + genCodeForTreeFlt(op1); + + if ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD || op1->gtOper == GT_CLS_VAR || + op1->gtOper == GT_IND || op1->gtOper == GT_LEA) && + tree->TypeGet() == TYP_DOUBLE) + { + // We take advantage here of the fact that we know that our + // codegen will have just loaded this from memory, and that + // therefore, no cast is really needed. + // Ideally we wouldn't do this optimization here, but in + // morphing. However, we need to do this after regalloc, as + // this optimization doesnt apply if what we're loading is a + // regvar + } + else + { + genRoundFpExpressionStackFP(op1, tree->TypeGet()); + } + + // Assign reg to tree + genMarkTreeInReg(tree, op1->gtRegNum); + + break; + } + default: + { + assert(!"unsupported cast"); + break; + } + } +} + +void CodeGen::genCodeForTreeStackFP_Special(GenTreePtr tree) +{ +#ifdef DEBUG + if (compiler->verbose) + { + printf("genCodeForTreeStackFP_Special() "); + Compiler::printTreeID(tree); + printf("\n"); + } +#endif // DEBUG + + switch (tree->OperGet()) + { + case GT_CALL: + { + genCodeForCall(tree, true); + break; + } + default: + NYI("genCodeForTreeStackFP_Special"); + break; + } +} + +void CodeGen::genCodeForTreeFloat(GenTreePtr tree, RegSet::RegisterPreference* pref) +{ + // TestTransitions(); + genTreeOps oper; + unsigned kind; + + assert(tree); + assert(tree->gtOper != GT_STMT); + assert(varTypeIsFloating(tree)); + + // What kind of node do we have? + oper = tree->OperGet(); + kind = tree->OperKind(); + + if (kind & GTK_CONST) + { + genCodeForTreeStackFP_Const(tree); + } + else if (kind & GTK_LEAF) + { + genCodeForTreeStackFP_Leaf(tree); + } + else if (kind & GTK_SMPOP) + { + genCodeForTreeStackFP_SmpOp(tree); + } + else + { + genCodeForTreeStackFP_Special(tree); + } + +#ifdef DEBUG + if (verbose) + { + JitDumpFPState(); + } + assert(compCurFPState.IsConsistent()); +#endif +} + +bool CodeGen::genCompInsStackFP(GenTreePtr tos, GenTreePtr other) +{ + // assume gensetupop done + + bool bUseFcomip = genUse_fcomip(); + bool bReverse = false; + + // Take op1 to top of the stack + FlatFPX87_MoveToTOS(&compCurFPState, tos->gtRegNum); + + // We pop top of stack if it's not a live regvar + bool bPopTos = !(tos->IsRegVar() && !tos->IsRegVarDeath()) || (tos->InReg()); + bool bPopOther = !(other->IsRegVar() && !other->IsRegVarDeath()) || (other->InReg()); + + assert(tos->IsRegVar() || (tos->InReg())); + + if (!(other->IsRegVar() || (other->InReg()))) + { + // op2 in memory + assert(bPopOther); + + if (bUseFcomip) + { + // We should have space for a load + assert(compCurFPState.m_uStackSize < FP_PHYSICREGISTERS); + + // load from mem, now the comparison will be the other way around + inst_FS_TT(INS_fld, other); + inst_FN(INS_fcomip, 1); + + // pop if we've been asked to do so + if (bPopTos) + { + inst_FS(INS_fstp, 0); + FlatFPX87_Kill(&compCurFPState, tos->gtRegNum); + } + + bReverse = true; + } + else + { + // compare directly with memory + if (bPopTos) + { + inst_FS_TT(INS_fcomp, other); + FlatFPX87_Kill(&compCurFPState, tos->gtRegNum); + } + else + { + inst_FS_TT(INS_fcom, other); + } + } + } + else + { + if (bUseFcomip) + { + if (bPopTos) + { + inst_FN(INS_fcomip, compCurFPState.VirtualToST(other->gtRegNum)); + FlatFPX87_Kill(&compCurFPState, tos->gtRegNum); + } + else + { + inst_FN(INS_fcomi, compCurFPState.VirtualToST(other->gtRegNum)); + } + + if (bPopOther) + { + FlatFPX87_Unload(&compCurFPState, other->gtRegNum); + } + } + else + { + if (bPopTos) + { + inst_FN(INS_fcomp, compCurFPState.VirtualToST(other->gtRegNum)); + FlatFPX87_Kill(&compCurFPState, tos->gtRegNum); + } + else + { + inst_FN(INS_fcom, compCurFPState.VirtualToST(other->gtRegNum)); + } + + if (bPopOther) + { + FlatFPX87_Unload(&compCurFPState, other->gtRegNum); + } + } + } + + if (!bUseFcomip) + { + // oops, we have to put result of compare in eflags + + // Grab EAX for the result of the fnstsw + regSet.rsGrabReg(RBM_EAX); + + // Generate the 'fnstsw' and test its result + inst_RV(INS_fnstsw, REG_EAX, TYP_INT); + regTracker.rsTrackRegTrash(REG_EAX); + instGen(INS_sahf); + } + + return bReverse; +} + +void CodeGen::genCondJumpFltStackFP(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bDoTransition) +{ + assert(jumpTrue && jumpFalse); + assert(!(cond->gtFlags & GTF_REVERSE_OPS)); // Done in genCondJump() + assert(varTypeIsFloating(cond->gtOp.gtOp1)); + + GenTreePtr op1 = cond->gtOp.gtOp1; + GenTreePtr op2 = cond->gtOp.gtOp2; + genTreeOps cmp = cond->OperGet(); + + // Prepare operands. + genSetupForOpStackFP(op1, op2, false, false, true, false); + + GenTreePtr tos; + GenTreePtr other; + bool bReverseCmp = false; + + if ((op2->IsRegVar() || (op2->InReg())) && // op2 is in a reg + (compCurFPState.TopVirtual() == (unsigned)op2->gtRegNum && // Is it already at the top of the stack? + (!op2->IsRegVar() || op2->IsRegVarDeath()))) // are we going to pop it off? + { + tos = op2; + other = op1; + bReverseCmp = true; + } + else + { + tos = op1; + other = op2; + bReverseCmp = false; + } + + if (genCompInsStackFP(tos, other)) + { + bReverseCmp = !bReverseCmp; + } + + // do .un comparison + if (cond->gtFlags & GTF_RELOP_NAN_UN) + { + // Generate the first jump (NaN check) + genCondJmpInsStackFP(EJ_jpe, jumpTrue, NULL, bDoTransition); + } + else + { + jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL; + + // Generate the first jump (NaN check) + genCondJmpInsStackFP(EJ_jpe, jumpFalse, NULL, bDoTransition); + } + + /* Generate the second jump (comparison) */ + const static BYTE dblCmpTstJmp2[] = { + EJ_je, // GT_EQ + EJ_jne, // GT_NE + EJ_jb, // GT_LT + EJ_jbe, // GT_LE + EJ_jae, // GT_GE + EJ_ja, // GT_GT + }; + + // Swap comp order if necessary + if (bReverseCmp) + { + cmp = GenTree::SwapRelop(cmp); + } + + genCondJmpInsStackFP((emitJumpKind)dblCmpTstJmp2[cmp - GT_EQ], jumpTrue, jumpFalse, bDoTransition); +} + +BasicBlock* CodeGen::genTransitionBlockStackFP(FlatFPStateX87* pState, BasicBlock* pFrom, BasicBlock* pTarget) +{ + // Fast paths where a transition block is not necessary + if (pTarget->bbFPStateX87 && FlatFPStateX87::AreEqual(pState, pTarget->bbFPStateX87) || pState->IsEmpty()) + { + return pTarget; + } + + // We shouldn't have any handlers if we're generating transition blocks, as we don't know + // how to recover them + assert(compiler->compMayHaveTransitionBlocks); + assert(compiler->compHndBBtabCount == 0); + +#ifdef DEBUG + compiler->fgSafeBasicBlockCreation = true; +#endif + + // Create a temp block + BasicBlock* pBlock = compiler->bbNewBasicBlock(BBJ_ALWAYS); + +#ifdef DEBUG + compiler->fgSafeBasicBlockCreation = false; +#endif + + VarSetOps::Assign(compiler, pBlock->bbLiveIn, pFrom->bbLiveOut); + VarSetOps::Assign(compiler, pBlock->bbLiveOut, pFrom->bbLiveOut); + + pBlock->bbJumpDest = pTarget; + pBlock->bbFlags |= BBF_JMP_TARGET; + // + // If either pFrom or pTarget are cold blocks then + // the transition block also must be cold + // + pBlock->bbFlags |= (pFrom->bbFlags & BBF_COLD); + pBlock->bbFlags |= (pTarget->bbFlags & BBF_COLD); + + // The FP state for the block is the same as the current one + pBlock->bbFPStateX87 = FlatFPAllocFPState(pState); + + if ((pBlock->bbFlags & BBF_COLD) || (compiler->fgFirstColdBlock == NULL)) + { + // + // If this block is cold or if all blocks are hot + // then we just insert it at the end of the method. + // + compiler->fgMoveBlocksAfter(pBlock, pBlock, compiler->fgLastBBInMainFunction()); + } + else + { + // + // This block is hot so we need to insert it in the hot region + // of the method. + // + BasicBlock* lastHotBlock = compiler->fgFirstColdBlock->bbPrev; + noway_assert(lastHotBlock != nullptr); + + if (lastHotBlock->bbFallsThrough()) + NO_WAY("Bad fgFirstColdBlock in genTransitionBlockStackFP()"); + + // + // Insert pBlock between lastHotBlock and fgFirstColdBlock + // + compiler->fgInsertBBafter(lastHotBlock, pBlock); + } + + return pBlock; +} + +void CodeGen::genCondJumpLngStackFP(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse) +{ + // For the moment, and so we don't have to deal with the amount of special cases + // we have, will insert a dummy block for jumpTrue (if necessary) that will do the + // transition for us. For the jumpFalse case, we play a trick. For the false case , + // a Long conditional has a fallthrough (least significant DWORD check is false) and + // also has a jump to the fallthrough (bbNext) if the most significant DWORD check + // fails. However, we do want to make an FP transition if we're in the later case, + // So what we do is create a label and make jumpFalse go there. This label is defined + // before doing the FP transition logic at the end of the block, so now both exit paths + // for false condition will go through the transition and then fall through to bbnext. + assert(jumpFalse == compiler->compCurBB->bbNext); + + BasicBlock* pTransition = genCreateTempLabel(); + + genCondJumpLng(cond, jumpTrue, pTransition, true); + + genDefineTempLabel(pTransition); +} + +void CodeGen::genQMarkRegVarTransition(GenTreePtr nextNode, VARSET_VALARG_TP liveset) +{ + // Kill any vars that may die in the transition + VARSET_TP VARSET_INIT_NOCOPY(newLiveSet, VarSetOps::Intersection(compiler, liveset, compiler->optAllFPregVars)); + + regMaskTP liveRegIn = genRegMaskFromLivenessStackFP(newLiveSet); + genCodeForTransitionFromMask(&compCurFPState, liveRegIn); + + unsigned i; + + // Kill all regvars + for (i = REG_FPV0; i < REG_FPCOUNT; i++) + { + if ((genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat)) + { + + genRegVarDeathStackFP(regSet.genRegVarsFloat[i]); + } + } + + // Born necessary regvars + for (i = 0; i < compiler->lvaTrackedCount; i++) + { + unsigned lclVar = compiler->lvaTrackedToVarNum[i]; + LclVarDsc* varDsc = compiler->lvaTable + lclVar; + + assert(varDsc->lvTracked); + + if (varDsc->lvRegister && VarSetOps::IsMember(compiler, newLiveSet, i)) + { + genRegVarBirthStackFP(varDsc); + } + } +} + +void CodeGen::genQMarkBeforeElseStackFP(QmarkStateStackFP* pState, VARSET_VALARG_TP varsetCond, GenTreePtr nextNode) +{ + assert(regSet.rsMaskLockedFloat == 0); + + // Save current state at colon + pState->stackState.Init(&compCurFPState); + + // Kill any vars that may die in the transition to then + genQMarkRegVarTransition(nextNode, varsetCond); +} + +void CodeGen::genQMarkAfterElseBlockStackFP(QmarkStateStackFP* pState, VARSET_VALARG_TP varsetCond, GenTreePtr nextNode) +{ + assert(regSet.rsMaskLockedFloat == 0); + + FlatFPStateX87 tempSwap; + + // Save current state. Now tempFPState will store the target state for the else block + tempSwap.Init(&compCurFPState); + + compCurFPState.Init(&pState->stackState); + + pState->stackState.Init(&tempSwap); + + // Did any regvars die in the then block that are live on entry to the else block? + unsigned i; + for (i = 0; i < compiler->lvaTrackedCount; i++) + { + if (VarSetOps::IsMember(compiler, varsetCond, i) && VarSetOps::IsMember(compiler, compiler->optAllFPregVars, i)) + { + // This variable should be live + unsigned lclnum = compiler->lvaTrackedToVarNum[i]; + LclVarDsc* varDsc = compiler->lvaTable + lclnum; + + if (regSet.genRegVarsFloat[varDsc->lvRegNum] != varDsc) + { + JITDUMP("genQMarkAfterThenBlockStackFP(): Fixing up regvar that was modified in then\n"); + if (regSet.genRegVarsFloat[varDsc->lvRegNum]) + { + genRegVarDeathStackFP(regSet.genRegVarsFloat[varDsc->lvRegNum]); + } + + genRegVarBirthStackFP(varDsc); + } + } + } + + // Kill any vars that may die in the transition + genQMarkRegVarTransition(nextNode, varsetCond); +} + +void CodeGen::genQMarkAfterThenBlockStackFP(QmarkStateStackFP* pState) +{ + JITDUMP("genQMarkAfterThenBlockStackFP()\n"); + assert(regSet.rsMaskLockedFloat == 0); + + // Generate transition to the previous one set by the then block + genCodeForTransitionStackFP(&compCurFPState, &pState->stackState); + + // Update state + compCurFPState.Init(&pState->stackState); +} + +void CodeGenInterface::SetRegVarFloat(regNumber reg, var_types type, LclVarDsc* varDsc) +{ + regMaskTP mask = genRegMaskFloat(reg, type); + + if (varDsc) + { + JITDUMP("marking register %s as a regvar\n", getRegNameFloat(reg, type)); + + assert(mask && ((regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat | regSet.rsMaskUsedFloat) & mask) == 0); + + regSet.rsMaskRegVarFloat |= mask; + } + else + { + JITDUMP("unmarking register %s as a regvar\n", getRegNameFloat(reg, type)); + + assert(mask && (regSet.rsMaskRegVarFloat & mask)); + + regSet.rsMaskRegVarFloat &= ~mask; + } + + // Update lookup table + regSet.genRegVarsFloat[reg] = varDsc; +} + +// Generates a conditional jump. It will do the appropiate stack matching for the jmpTrue. +// We don't use jumpFalse anywhere and the integer codebase assumes that it will be bbnext, and that is +// taken care of at the end of the bb code generation. +void CodeGen::genCondJmpInsStackFP(emitJumpKind jumpKind, + BasicBlock* jumpTrue, + BasicBlock* jumpFalse, + bool bDoTransition) +{ + // Assert the condition above. + assert(!jumpFalse || jumpFalse == compiler->compCurBB->bbNext || !bDoTransition); + + // Do the fp stack matching. + if (bDoTransition && !jumpTrue->bbFPStateX87 && + FlatFPSameRegisters(&compCurFPState, genRegMaskFromLivenessStackFP(jumpTrue->bbLiveIn))) + { + // Target block doesn't have state yet, but has the same registers, so + // we allocate the block and generate the normal jump + genCodeForBBTransitionStackFP(jumpTrue); + inst_JMP(jumpKind, jumpTrue); + } + else if (!bDoTransition || compCurFPState.IsEmpty() || // If it's empty, target has to be empty too. + (jumpTrue->bbFPStateX87 && FlatFPStateX87::AreEqual(&compCurFPState, jumpTrue->bbFPStateX87))) + { + // Nothing to do here. Proceed normally and generate the jump + inst_JMP(jumpKind, jumpTrue); + + if (jumpFalse && jumpFalse != compiler->compCurBB->bbNext) + { + inst_JMP(EJ_jmp, jumpFalse); + } + } + else + { + // temporal workaround for stack matching + // do a forward conditional jump, generate the transition and jump to the target + // The payload is an aditional jump instruction, but both jumps will be correctly + // predicted by the processor in the loop case. + BasicBlock* endLabel = NULL; + + endLabel = genCreateTempLabel(); + + inst_JMP(emitter::emitReverseJumpKind(jumpKind), endLabel); + + genCodeForBBTransitionStackFP(jumpTrue); + + inst_JMP(EJ_jmp, jumpTrue); + + genDefineTempLabel(endLabel); + } +} + +void CodeGen::genTableSwitchStackFP(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab) +{ + // Only come here when we have to do something special for the FPU stack! + // + assert(!compCurFPState.IsEmpty()); + VARSET_TP VARSET_INIT_NOCOPY(liveInFP, VarSetOps::MakeEmpty(compiler)); + VARSET_TP VARSET_INIT_NOCOPY(liveOutFP, VarSetOps::MakeEmpty(compiler)); + for (unsigned i = 0; i < jumpCnt; i++) + { + VarSetOps::Assign(compiler, liveInFP, jumpTab[i]->bbLiveIn); + VarSetOps::IntersectionD(compiler, liveInFP, compiler->optAllFPregVars); + VarSetOps::Assign(compiler, liveOutFP, compiler->compCurBB->bbLiveOut); + VarSetOps::IntersectionD(compiler, liveOutFP, compiler->optAllFPregVars); + + if (!jumpTab[i]->bbFPStateX87 && VarSetOps::Equal(compiler, liveInFP, liveOutFP)) + { + // Hasn't state yet and regvar set is the same, so just copy state and don't change the jump + jumpTab[i]->bbFPStateX87 = FlatFPAllocFPState(&compCurFPState); + } + else if (jumpTab[i]->bbFPStateX87 && FlatFPStateX87::AreEqual(&compCurFPState, jumpTab[i]->bbFPStateX87)) + { + // Same state, don't change the jump + } + else + { + // We have to do a transition. First check if we can reuse another one + unsigned j; + for (j = 0; j < i; j++) + { + // Has to be already forwarded. If not it can't be targetting the same block + if (jumpTab[j]->bbFlags & BBF_FORWARD_SWITCH) + { + if (jumpTab[i] == jumpTab[j]->bbJumpDest) + { + // yipee, we can reuse this transition block + jumpTab[i] = jumpTab[j]; + break; + } + } + } + + if (j == i) + { + // We will have to create a new transition block + jumpTab[i] = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTab[i]); + + jumpTab[i]->bbFlags |= BBF_FORWARD_SWITCH; + } + } + } + + // Clear flag + for (unsigned i = 0; i < jumpCnt; i++) + { + jumpTab[i]->bbFlags &= ~BBF_FORWARD_SWITCH; + } + + // everything's fixed now, so go down the normal path + return genTableSwitch(reg, jumpCnt, jumpTab); +} + +bool CodeGen::genConstantLoadStackFP(GenTreePtr tree, bool bOnlyNoMemAccess) +{ + assert(tree->gtOper == GT_CNS_DBL); + + bool bFastConstant = false; + instruction ins_ConstantNN = INS_fldz; // keep compiler happy + + // Both positive 0 and 1 are represnetable in float and double, beware if we add other constants + switch (*((__int64*)&(tree->gtDblCon.gtDconVal))) + { + case 0: + // CAREFUL here!, -0 is different than +0, a -0 shouldn't issue a fldz. + ins_ConstantNN = INS_fldz; + bFastConstant = true; + break; + case I64(0x3ff0000000000000): + ins_ConstantNN = INS_fld1; + bFastConstant = true; + } + + if (bFastConstant == false && bOnlyNoMemAccess) + { + // Caller asked only to generate instructions if it didn't involve memory accesses + return false; + } + + if (bFastConstant) + { + assert(compCurFPState.m_uStackSize <= FP_PHYSICREGISTERS); + instGen(ins_ConstantNN); + } + else + { + GenTreePtr addr; + if (tree->gtType == TYP_FLOAT || StackFPIsSameAsFloat(tree->gtDblCon.gtDconVal)) + { + float f = forceCastToFloat(tree->gtDblCon.gtDconVal); + addr = genMakeConst(&f, TYP_FLOAT, tree, false); + } + else + { + addr = genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true); + } + + inst_FS_TT(INS_fld, addr); + } + + return true; +} + +// Function called at the end of every statement. For stack based x87 its mission is to +// remove any remaining temps on the stack. +void CodeGen::genEndOfStatement() +{ + unsigned i; + +#ifdef DEBUG + // Sanity check + unsigned uTemps = 0; + for (i = REG_FPV0; i < REG_FPCOUNT; i++) + { + if (compCurFPState.Mapped(i) && // register is mapped + (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat) == 0) // but not enregistered + { + uTemps++; + } + } + assert(uTemps <= 1); +#endif + + for (i = REG_FPV0; i < REG_FPCOUNT; i++) + { + if (compCurFPState.Mapped(i) && // register is mapped + (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat) == 0) // but not enregistered + { + // remove register from stacks + FlatFPX87_Unload(&compCurFPState, i); + } + } + + assert(ConsistentAfterStatementStackFP()); +} + +bool CodeGen::StackFPIsSameAsFloat(double d) +{ + if (forceCastToFloat(d) == d) + { + JITDUMP("StackFPIsSameAsFloat is true for value %lf\n", d); + return true; + } + else + { + JITDUMP("StackFPIsSameAsFloat is false for value %lf\n", d); + } + + return false; +} + +GenTreePtr CodeGen::genMakeAddressableStackFP(GenTreePtr tree, + regMaskTP* regMaskIntPtr, + regMaskTP* regMaskFltPtr, + bool bCollapseConstantDoubles) +{ + *regMaskIntPtr = *regMaskFltPtr = 0; + + switch (tree->OperGet()) + { + case GT_CNS_DBL: + if (tree->gtDblCon.gtDconVal == 0.0 || tree->gtDblCon.gtDconVal == 1.0) + { + // For constants like 0 or 1 don't waste memory + genCodeForTree(tree, 0); + regSet.SetUsedRegFloat(tree, true); + + *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum); + return tree; + } + else + { + GenTreePtr addr; + if (tree->gtType == TYP_FLOAT || + (bCollapseConstantDoubles && StackFPIsSameAsFloat(tree->gtDblCon.gtDconVal))) + { + float f = forceCastToFloat(tree->gtDblCon.gtDconVal); + addr = genMakeConst(&f, TYP_FLOAT, tree, true); + } + else + { + addr = genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true); + } +#ifdef DEBUG + if (compiler->verbose) + { + printf("Generated new constant in tree "); + Compiler::printTreeID(addr); + printf(" with value %lf\n", tree->gtDblCon.gtDconVal); + } +#endif // DEBUG + tree->CopyFrom(addr, compiler); + return tree; + } + break; + case GT_REG_VAR: + // We take care about this in genKeepAddressableStackFP + return tree; + case GT_LCL_VAR: + case GT_LCL_FLD: + case GT_CLS_VAR: + return tree; + + case GT_LEA: + if (!genMakeIndAddrMode(tree, tree, false, 0, RegSet::KEEP_REG, regMaskIntPtr, false)) + { + assert(false); + } + genUpdateLife(tree); + return tree; + + case GT_IND: + // Try to make the address directly addressable + + if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, 0, RegSet::KEEP_REG, regMaskIntPtr, false)) + { + genUpdateLife(tree); + return tree; + } + else + { + GenTreePtr addr = tree; + tree = tree->gtOp.gtOp1; + + genCodeForTree(tree, 0); + regSet.rsMarkRegUsed(tree, addr); + + *regMaskIntPtr = genRegMask(tree->gtRegNum); + return addr; + } + + // fall through + + default: + genCodeForTreeFloat(tree); + regSet.SetUsedRegFloat(tree, true); + + // update mask + *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum); + + return tree; + break; + } +} + +void CodeGen::genKeepAddressableStackFP(GenTreePtr tree, regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr) +{ + regMaskTP regMaskInt, regMaskFlt; + + regMaskInt = *regMaskIntPtr; + regMaskFlt = *regMaskFltPtr; + + *regMaskIntPtr = *regMaskFltPtr = 0; + + switch (tree->OperGet()) + { + case GT_REG_VAR: + // If register has been spilled, unspill it + if (tree->gtFlags & GTF_SPILLED) + { + UnspillFloat(&compiler->lvaTable[tree->gtLclVarCommon.gtLclNum]); + } + + // If regvar is dying, take it out of the regvar mask + if (tree->IsRegVarDeath()) + { + genRegVarDeathStackFP(tree); + } + genUpdateLife(tree); + + return; + case GT_CNS_DBL: + { + if (tree->gtFlags & GTF_SPILLED) + { + UnspillFloat(tree); + } + + *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum); + + return; + } + case GT_LCL_FLD: + case GT_LCL_VAR: + case GT_CLS_VAR: + genUpdateLife(tree); + return; + case GT_IND: + case GT_LEA: + if (regMaskFlt) + { + // fall through + } + else + { + *regMaskIntPtr = genKeepAddressable(tree, regMaskInt, 0); + *regMaskFltPtr = 0; + return; + } + default: + + *regMaskIntPtr = 0; + if (tree->gtFlags & GTF_SPILLED) + { + UnspillFloat(tree); + } + *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum); + return; + } +} + +void CodeGen::genDoneAddressableStackFP(GenTreePtr tree, + regMaskTP addrRegInt, + regMaskTP addrRegFlt, + RegSet::KeepReg keptReg) +{ + assert(!(addrRegInt && addrRegFlt)); + + if (addrRegInt) + { + return genDoneAddressable(tree, addrRegInt, keptReg); + } + else if (addrRegFlt) + { + if (keptReg == RegSet::KEEP_REG) + { + for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++) + { + if (genRegMaskFloat((regNumber)i) & addrRegFlt) + { + regSet.SetUsedRegFloat(tree, false); + } + } + } + } +} + +void CodeGen::FlatFPX87_Kill(FlatFPStateX87* pState, unsigned uVirtual) +{ + JITDUMP("Killing %s\n", regVarNameStackFP((regNumber)uVirtual)); + + assert(pState->TopVirtual() == uVirtual); + pState->Pop(); +} + +void CodeGen::FlatFPX87_PushVirtual(FlatFPStateX87* pState, unsigned uRegister, bool bEmitCode) +{ + JITDUMP("Pushing %s to stack\n", regVarNameStackFP((regNumber)uRegister)); + + pState->Push(uRegister); +} + +unsigned CodeGen::FlatFPX87_Pop(FlatFPStateX87* pState, bool bEmitCode) +{ + assert(pState->m_uStackSize > 0); + + // Update state + unsigned uVirtual = pState->Pop(); + + // Emit instruction + if (bEmitCode) + { + inst_FS(INS_fstp, 0); + } + + return (uVirtual); +} + +unsigned CodeGen::FlatFPX87_Top(FlatFPStateX87* pState, bool bEmitCode) +{ + return pState->TopVirtual(); +} + +void CodeGen::FlatFPX87_Unload(FlatFPStateX87* pState, unsigned uVirtual, bool bEmitCode) +{ + if (uVirtual != pState->TopVirtual()) + { + // We will do an fstp to the right place + + // Update state + unsigned uStack = pState->m_uVirtualMap[uVirtual]; + unsigned uPhysic = pState->StackToST(uStack); + + pState->Unmap(uVirtual); + pState->Associate(pState->TopVirtual(), uStack); + pState->m_uStackSize--; + +#ifdef DEBUG + + pState->m_uStack[pState->m_uStackSize] = (unsigned)-1; +#endif + + // Emit instruction + if (bEmitCode) + { + inst_FS(INS_fstp, uPhysic); + } + } + else + { + // Emit fstp + FlatFPX87_Pop(pState, bEmitCode); + } + + assert(pState->IsConsistent()); +} + +void CodeGenInterface::FlatFPX87_MoveToTOS(FlatFPStateX87* pState, unsigned uVirtual, bool bEmitCode) +{ + assert(!IsUninitialized(uVirtual)); + + JITDUMP("Moving %s to top of stack\n", regVarNameStackFP((regNumber)uVirtual)); + + if (uVirtual != pState->TopVirtual()) + { + FlatFPX87_SwapStack(pState, pState->m_uVirtualMap[uVirtual], pState->TopIndex(), bEmitCode); + } + else + { + JITDUMP("%s already on the top of stack\n", regVarNameStackFP((regNumber)uVirtual)); + } + + assert(pState->IsConsistent()); +} + +void CodeGenInterface::FlatFPX87_SwapStack(FlatFPStateX87* pState, unsigned i, unsigned j, bool bEmitCode) +{ + assert(i != j); + assert(i < pState->m_uStackSize); + assert(j < pState->m_uStackSize); + + JITDUMP("Exchanging ST(%i) and ST(%i)\n", pState->StackToST(i), pState->StackToST(j)); + + // issue actual swaps + int iPhysic = pState->StackToST(i); + int jPhysic = pState->StackToST(j); + + if (bEmitCode) + { + if (iPhysic == 0 || jPhysic == 0) + { + inst_FN(INS_fxch, iPhysic ? iPhysic : jPhysic); + } + else + { + inst_FN(INS_fxch, iPhysic); + inst_FN(INS_fxch, jPhysic); + inst_FN(INS_fxch, iPhysic); + } + } + + // Update State + + // Swap Register file + pState->m_uVirtualMap[pState->m_uStack[i]] = j; + pState->m_uVirtualMap[pState->m_uStack[j]] = i; + + // Swap stack + int temp; + temp = pState->m_uStack[i]; + pState->m_uStack[i] = pState->m_uStack[j]; + pState->m_uStack[j] = temp; + + assert(pState->IsConsistent()); +} + +#ifdef DEBUG + +void CodeGen::JitDumpFPState() +{ + int i; + + if ((regSet.rsMaskUsedFloat != 0) || (regSet.rsMaskRegVarFloat != 0)) + { + printf("FPSTATE\n"); + printf("Used virtual registers: "); + for (i = REG_FPV0; i < REG_FPCOUNT; i++) + { + if (genRegMaskFloat((regNumber)i) & regSet.rsMaskUsedFloat) + { + printf("FPV%i ", i); + } + } + printf("\n"); + + printf("virtual registers holding reg vars: "); + for (i = REG_FPV0; i < REG_FPCOUNT; i++) + { + if (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat) + { + printf("FPV%i ", i); + } + } + printf("\n"); + } + compCurFPState.Dump(); +} +#endif + +// +// +// Register allocation +// +struct ChangeToRegVarCallback +{ + unsigned lclnum; + regNumber reg; +}; + +void Compiler::raInitStackFP() +{ + // Reset local/reg interference + for (int i = 0; i < REG_FPCOUNT; i++) + { + VarSetOps::AssignNoCopy(this, raLclRegIntfFloat[i], VarSetOps::MakeEmpty(this)); + } + + VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::MakeEmpty(this)); + VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this)); + VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this)); + + raCntStkStackFP = 0; + raCntWtdStkDblStackFP = 0; + raCntStkParamDblStackFP = 0; + + VarSetOps::AssignNoCopy(this, raMaskDontEnregFloat, VarSetOps::MakeEmpty(this)); + + // Calculate the set of all tracked FP/non-FP variables + // into compiler->optAllFloatVars and compiler->optAllNonFPvars + unsigned lclNum; + LclVarDsc* varDsc; + + for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) + { + /* Ignore the variable if it's not tracked */ + + if (!varDsc->lvTracked) + continue; + + /* Get hold of the index and the interference mask for the variable */ + + unsigned varNum = varDsc->lvVarIndex; + + /* add to the set of all tracked FP/non-FP variables */ + + if (varDsc->IsFloatRegType()) + VarSetOps::AddElemD(this, optAllFloatVars, varNum); + else + VarSetOps::AddElemD(this, optAllNonFPvars, varNum); + } +} + +#ifdef DEBUG +void Compiler::raDumpVariableRegIntfFloat() +{ + unsigned i; + unsigned j; + + for (i = REG_FPV0; i < REG_FPCOUNT; i++) + { + if (!VarSetOps::IsEmpty(this, raLclRegIntfFloat[i])) + { + JITDUMP("FPV%u interferes with ", i); + for (j = 0; j < lvaTrackedCount; j++) + { + assert(VarSetOps::IsEmpty(this, VarSetOps::Diff(this, raLclRegIntfFloat[i], optAllFloatVars))); + + if (VarSetOps::IsMember(this, raLclRegIntfFloat[i], j)) + { + JITDUMP("T%02u/V%02u, ", j, lvaTrackedToVarNum[j]); + } + } + JITDUMP("\n"); + } + } +} +#endif + +// Returns the regnum for the variable passed as param takin in account +// the fpvar to register interference mask. If we can't find anything, we +// will return REG_FPNONE +regNumber Compiler::raRegForVarStackFP(unsigned varTrackedIndex) +{ + for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++) + { + if (!VarSetOps::IsMember(this, raLclRegIntfFloat[i], varTrackedIndex)) + { + return (regNumber)i; + } + } + + return REG_FPNONE; +} + +void Compiler::raAddPayloadStackFP(VARSET_VALARG_TP maskArg, unsigned weight) +{ + VARSET_TP VARSET_INIT_NOCOPY(mask, VarSetOps::Intersection(this, maskArg, optAllFloatVars)); + if (VarSetOps::IsEmpty(this, mask)) + { + return; + } + + for (unsigned i = 0; i < lvaTrackedCount; i++) + { + if (VarSetOps::IsMember(this, mask, i)) + { + raPayloadStackFP[i] += weight; + } + } +} + +bool Compiler::raVarIsGreaterValueStackFP(LclVarDsc* lv1, LclVarDsc* lv2) +{ + assert(lv1->lvTracked); + assert(lv2->lvTracked); + + bool bSmall = (compCodeOpt() == SMALL_CODE); + + double weight1 = double(bSmall ? lv1->lvRefCnt : lv1->lvRefCntWtd) - double(raPayloadStackFP[lv1->lvVarIndex]) - + double(raHeightsStackFP[lv1->lvVarIndex][FP_VIRTUALREGISTERS]); + + double weight2 = double(bSmall ? lv2->lvRefCnt : lv2->lvRefCntWtd) - double(raPayloadStackFP[lv2->lvVarIndex]) - + double(raHeightsStackFP[lv2->lvVarIndex][FP_VIRTUALREGISTERS]); + + double diff = weight1 - weight2; + + if (diff) + { + return diff > 0 ? true : false; + } + else + { + return int(lv1->lvRefCnt - lv2->lvRefCnt) ? true : false; + } +} + +#ifdef DEBUG +// Dumps only interesting vars (the ones that are not enregistered yet +void Compiler::raDumpHeightsStackFP() +{ + unsigned i; + unsigned j; + + JITDUMP("raDumpHeightsStackFP():\n"); + JITDUMP("--------------------------------------------------------\n"); + JITDUMP("Weighted Height Table Dump\n "); + for (i = 0; i < FP_VIRTUALREGISTERS; i++) + { + JITDUMP(" %i ", i + 1); + } + + JITDUMP("OVF\n"); + + for (i = 0; i < lvaTrackedCount; i++) + { + if (VarSetOps::IsMember(this, optAllFloatVars, i) && !VarSetOps::IsMember(this, optAllFPregVars, i)) + { + JITDUMP("V%02u/T%02u: ", lvaTrackedToVarNum[i], i); + + for (j = 0; j <= FP_VIRTUALREGISTERS; j++) + { + JITDUMP("%5u ", raHeightsStackFP[i][j]); + } + JITDUMP("\n"); + } + } + + JITDUMP("\nNonweighted Height Table Dump\n "); + for (i = 0; i < FP_VIRTUALREGISTERS; i++) + { + JITDUMP(" %i ", i + 1); + } + + JITDUMP("OVF\n"); + + for (i = 0; i < lvaTrackedCount; i++) + { + if (VarSetOps::IsMember(this, optAllFloatVars, i) && !VarSetOps::IsMember(this, optAllFPregVars, i)) + { + JITDUMP("V%02u/T%02u: ", lvaTrackedToVarNum[i], i); + + for (j = 0; j <= FP_VIRTUALREGISTERS; j++) + { + JITDUMP("%5u ", raHeightsNonWeightedStackFP[i][j]); + } + JITDUMP("\n"); + } + } + JITDUMP("--------------------------------------------------------\n"); +} +#endif + +// Increases heights for tracked variables given in mask. We call this +// function when we enregister a variable and will cause the heights to +// shift one place to the right. +void Compiler::raUpdateHeightsForVarsStackFP(VARSET_VALARG_TP mask) +{ + assert(VarSetOps::IsSubset(this, mask, optAllFloatVars)); + + for (unsigned i = 0; i < lvaTrackedCount; i++) + { + if (VarSetOps::IsMember(this, mask, i)) + { + for (unsigned j = FP_VIRTUALREGISTERS; j > 0; j--) + { + raHeightsStackFP[i][j] = raHeightsStackFP[i][j - 1]; + +#ifdef DEBUG + raHeightsNonWeightedStackFP[i][j] = raHeightsNonWeightedStackFP[i][j - 1]; +#endif + } + + raHeightsStackFP[i][0] = 0; +#ifdef DEBUG + raHeightsNonWeightedStackFP[i][0] = 0; +#endif + } + } + +#ifdef DEBUG + raDumpHeightsStackFP(); +#endif +} + +// This is the prepass we do to adjust refcounts across calls and +// create the height structure. +void Compiler::raEnregisterVarsPrePassStackFP() +{ + BasicBlock* block; + + assert(!VarSetOps::IsEmpty(this, optAllFloatVars)); + + // Initialization of the height table + memset(raHeightsStackFP, 0, sizeof(raHeightsStackFP)); + + // Initialization of the payload table + memset(raPayloadStackFP, 0, sizeof(raPayloadStackFP)); + +#ifdef DEBUG + memset(raHeightsNonWeightedStackFP, 0, sizeof(raHeightsStackFP)); +#endif + + // We will have a quick table with the pointers to the interesting varDscs + // so that we don't have to scan for them for each tree. + unsigned FPVars[lclMAX_TRACKED]; + unsigned numFPVars = 0; + for (unsigned i = 0; i < lvaTrackedCount; i++) + { + if (VarSetOps::IsMember(this, optAllFloatVars, i)) + { + FPVars[numFPVars++] = i; + } + } + + assert(numFPVars == VarSetOps::Count(this, optAllFloatVars)); + + // Things we check here: + // + // We substract 2 for each FP variable that's live across a call, as we will + // have 2 memory accesses to spill and unpsill around it. + // + // + // + VARSET_TP VARSET_INIT_NOCOPY(blockLiveOutFloats, VarSetOps::MakeEmpty(this)); + for (block = fgFirstBB; block; block = block->bbNext) + { + compCurBB = block; + /* + This opt fails in the case of a variable that has it's entire lifetime contained in the 'then' of + a qmark. The use mask for the whole qmark won't contain that variable as it variable's value comes + from a def in the else, and the def can't be set for the qmark if the else side of + the qmark doesn't do a def. + + See VSW# 354454 for more info. Leaving the comment and code here just in case we try to be + 'smart' again in the future + + + if (((block->bbVarUse | + block->bbVarDef | + block->bbLiveIn ) & optAllFloatVars) == 0) + { + // Fast way out + continue; + } + */ + VarSetOps::Assign(this, blockLiveOutFloats, block->bbLiveOut); + VarSetOps::IntersectionD(this, blockLiveOutFloats, optAllFloatVars); + if (!VarSetOps::IsEmpty(this, blockLiveOutFloats)) + { + // See comment in compiler.h above declaration of compMayHaveTransitionBlocks + // to understand the reason for this limitation of FP optimizer. + switch (block->bbJumpKind) + { + case BBJ_COND: + { + GenTreePtr stmt; + stmt = block->bbTreeList->gtPrev; + assert(stmt->gtNext == NULL && stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE); + + assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1); + GenTreePtr cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1; + + assert(cond->OperIsCompare()); + + if (cond->gtOp.gtOp1->TypeGet() == TYP_LONG) + { + if (compHndBBtabCount > 0) + { + // If we have any handlers we won't enregister whatever is live out of this block + JITDUMP("PERF Warning: Taking out FP candidates due to transition blocks + exception " + "handlers.\n"); + VarSetOps::UnionD(this, raMaskDontEnregFloat, + VarSetOps::Intersection(this, block->bbLiveOut, optAllFloatVars)); + } + else + { + // long conditional jumps can generate transition bloks + compMayHaveTransitionBlocks = true; + } + } + + break; + } + case BBJ_SWITCH: + { + if (compHndBBtabCount > 0) + { + // If we have any handlers we won't enregister whatever is live out of this block + JITDUMP( + "PERF Warning: Taking out FP candidates due to transition blocks + exception handlers.\n"); + VarSetOps::UnionD(this, raMaskDontEnregFloat, + VarSetOps::Intersection(this, block->bbLiveOut, optAllFloatVars)); + } + else + { + // fp vars are live out of the switch, so we may have transition blocks + compMayHaveTransitionBlocks = true; + } + break; + default: + break; + } + } + } + + VARSET_TP VARSET_INIT(this, liveSet, block->bbLiveIn); + for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext) + { + assert(stmt->gtOper == GT_STMT); + + unsigned prevHeight = stmt->gtStmt.gtStmtList->gtFPlvl; + for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext) + { + VarSetOps::AssignNoCopy(this, liveSet, fgUpdateLiveSet(liveSet, tree)); + switch (tree->gtOper) + { + case GT_CALL: + raAddPayloadStackFP(liveSet, block->getBBWeight(this) * 2); + break; + case GT_CAST: + // For cast from long local var to double, decrement the ref count of the long + // to avoid store forwarding stall + if (tree->gtType == TYP_DOUBLE) + { + GenTreePtr op1 = tree->gtOp.gtOp1; + if (op1->gtOper == GT_LCL_VAR && op1->gtType == TYP_LONG) + { + unsigned int lclNum = op1->gtLclVarCommon.gtLclNum; + assert(lclNum < lvaCount); + LclVarDsc* varDsc = lvaTable + lclNum; + unsigned int weightedRefCnt = varDsc->lvRefCntWtd; + unsigned int refCntDecrement = 2 * block->getBBWeight(this); + if (refCntDecrement > weightedRefCnt) + { + varDsc->lvRefCntWtd = 0; + } + else + { + varDsc->lvRefCntWtd = weightedRefCnt - refCntDecrement; + } + } + } + break; + default: + break; + } + + // Update heights + unsigned height = tree->gtFPlvl; + + if (height != prevHeight) + { + if (height > prevHeight && height < FP_VIRTUALREGISTERS) + { + for (unsigned i = 0; i < numFPVars; i++) + { + if (VarSetOps::IsMember(this, liveSet, FPVars[i])) + { + // The -1 are because we don't care about stack height 0 + // and we will use offset FP_VIRTUALREGISTERS to know what's + // the count when we overflow. we multiply by 2, because that + // is the number of memory accesses we will do for each spill + // (even if we op directly with the spill) + if (compCodeOpt() == SMALL_CODE) + { + raHeightsStackFP[FPVars[i]][height - 1] += 2; + } + else + { + raHeightsStackFP[FPVars[i]][height - 1] += 2 * block->getBBWeight(this); + } + +#ifdef DEBUG + raHeightsNonWeightedStackFP[FPVars[i]][height - 1]++; +#endif + } + } + } + + prevHeight = height; + } + } + } + } + compCurBB = NULL; + + if (compJmpOpUsed) + { + // Disable enregistering of FP vars for methods with jmp op. We have really no + // coverage here. + // The problem with FP enreg vars is that the returning block is marked with having + // all variables live on exit. This works for integer vars, but for FP vars we must + // do the work to unload them. This is fairly straightforward to do, but I'm worried + // by the coverage, so I'll take the conservative aproach of disabling FP enregistering + // and we will fix it if there is demand + JITDUMP("PERF Warning: Disabling FP enregistering due to JMP op!!!!!!!.\n"); + VarSetOps::UnionD(this, raMaskDontEnregFloat, optAllFloatVars); + } + +#ifdef DEBUG + raDumpHeightsStackFP(); +#endif +} + +void Compiler::raSetRegLclBirthDeath(GenTreePtr tree, VARSET_VALARG_TP lastlife, bool fromLDOBJ) +{ + assert(tree->gtOper == GT_LCL_VAR); + + unsigned lclnum = tree->gtLclVarCommon.gtLclNum; + assert(lclnum < lvaCount); + + LclVarDsc* varDsc = lvaTable + lclnum; + + if (!varDsc->lvTracked) + { + // Not tracked, can't be one of the enreg fp vars + return; + } + + unsigned varIndex = varDsc->lvVarIndex; + + if (!VarSetOps::IsMember(this, optAllFPregVars, varIndex)) + { + // Not one of the enreg fp vars + return; + } + + assert(varDsc->lvRegNum != REG_FPNONE); + assert(!VarSetOps::IsMember(this, raMaskDontEnregFloat, varIndex)); + + unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK); + tree->ChangeOper(GT_REG_VAR); + tree->gtFlags |= livenessFlags; + tree->gtRegNum = varDsc->lvRegNum; + tree->gtRegVar.gtRegNum = varDsc->lvRegNum; + tree->gtRegVar.SetLclNum(lclnum); + + // A liveset can change in a lclvar even if the lclvar itself is not + // changing its life. This can happen for lclvars inside qmarks, + // where lclvars die across the colon edge. + // SO, either + // it is marked GTF_VAR_DEATH (already set by fgComputeLife) + // OR it is already live + // OR it is becoming live + // + if ((tree->gtFlags & GTF_VAR_DEATH) == 0) + { + if ((tree->gtFlags & GTF_VAR_DEF) != 0) + + { + tree->gtFlags |= GTF_REG_BIRTH; + } + } + +#ifdef DEBUG + if (verbose) + gtDispTree(tree); +#endif +} + +// In this pass we set the regvars and set the birth and death flags. we do it +// for all enregistered variables at once. +void Compiler::raEnregisterVarsPostPassStackFP() +{ + if (VarSetOps::IsEmpty(this, optAllFPregVars)) + { + // Nothing to fix up. + } + + BasicBlock* block; + + JITDUMP("raEnregisterVarsPostPassStackFP:\n"); + + for (block = fgFirstBB; block; block = block->bbNext) + { + compCurBB = block; + + /* + This opt fails in the case of a variable that has it's entire lifetime contained in the 'then' of + a qmark. The use mask for the whole qmark won't contain that variable as it variable's value comes + from a def in the else, and the def can't be set for the qmark if the else side of + the qmark doesn't do a def. + + See VSW# 354454 for more info. Leaving the comment and code here just in case we try to be + 'smart' again in the future + + + + if (((block->bbVarUse | + block->bbVarDef | + block->bbLiveIn ) & optAllFPregVars) == 0) + { + // Fast way out + continue; + } + */ + + VARSET_TP VARSET_INIT(this, lastlife, block->bbLiveIn); + for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext) + { + assert(stmt->gtOper == GT_STMT); + + for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; + VarSetOps::AssignNoCopy(this, lastlife, fgUpdateLiveSet(lastlife, tree)), tree = tree->gtNext) + { + if (tree->gtOper == GT_LCL_VAR) + { + raSetRegLclBirthDeath(tree, lastlife, false); + } + } + } + assert(VarSetOps::Equal(this, lastlife, block->bbLiveOut)); + } + compCurBB = NULL; +} + +void Compiler::raGenerateFPRefCounts() +{ + // Update ref counts to stack + assert(raCntWtdStkDblStackFP == 0); + assert(raCntStkParamDblStackFP == 0); + assert(raCntStkStackFP == 0); + + LclVarDsc* varDsc; + unsigned lclNum; + for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) + { + if (varDsc->lvType == TYP_DOUBLE || + varDsc->lvStructDoubleAlign) // Account for structs (A bit over aggressive here, we should + // account for field accesses, but should be a reasonable + // heuristic). + { + if (varDsc->lvRegister) + { + assert(varDsc->lvTracked); + } + else + { + // Increment tmp access + raCntStkStackFP += varDsc->lvRefCnt; + + if (varDsc->lvIsParam) + { + // Why is this not weighted? + raCntStkParamDblStackFP += varDsc->lvRefCnt; + } + else + { + raCntWtdStkDblStackFP += varDsc->lvRefCntWtd; + } + } + } + } + +#ifdef DEBUG + if ((raCntWtdStkDblStackFP > 0) || (raCntStkParamDblStackFP > 0)) + { + JITDUMP("StackFP double stack weighted ref count: %u ; param ref count: %u\n", raCntWtdStkDblStackFP, + raCntStkParamDblStackFP); + } +#endif +} + +void Compiler::raEnregisterVarsStackFP() +{ + const int FPENREGTHRESHOLD = 1; + const unsigned int FPENREGTHRESHOLD_WEIGHTED = FPENREGTHRESHOLD; + + // Do init + raInitStackFP(); + + if (opts.compDbgCode || opts.MinOpts()) + { + // no enregistering for these options. + return; + } + + if (VarSetOps::IsEmpty(this, optAllFloatVars)) + { + // No floating point vars. bail out + return; + } + + // Do additional pass updating weights and generating height table + raEnregisterVarsPrePassStackFP(); + + // Vars are ordered by weight + LclVarDsc* varDsc; + + // Set an interference with V0 and V1, which we reserve as a temp registers. + // We need only one temp. but we will take the easy way, as by using + // two, we will need to teach codegen how to operate with spilled variables + VarSetOps::Assign(this, raLclRegIntfFloat[REG_FPV0], optAllFloatVars); + VarSetOps::Assign(this, raLclRegIntfFloat[REG_FPV1], optAllFloatVars); + +#ifdef DEBUG + if (codeGen->genStressFloat()) + { + // Lock out registers for stress. + regMaskTP locked = codeGen->genStressLockedMaskFloat(); + for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++) + { + if (locked & genRegMaskFloat((regNumber)i)) + { + VarSetOps::Assign(this, raLclRegIntfFloat[i], optAllFloatVars); + } + } + } +#endif + + // Build the interesting FP var table + LclVarDsc* fpLclFPVars[lclMAX_TRACKED]; + unsigned numFPVars = 0; + for (unsigned i = 0; i < lvaTrackedCount; i++) + { + if (VarSetOps::IsMember(this, raMaskDontEnregFloat, i)) + { + JITDUMP("Won't enregister V%02i (T%02i) because it's marked as dont enregister\n", lvaTrackedToVarNum[i], + i); + continue; + } + + if (VarSetOps::IsMember(this, optAllFloatVars, i)) + { + varDsc = lvaTable + lvaTrackedToVarNum[i]; + + assert(varDsc->lvTracked); + + if (varDsc->lvDoNotEnregister) + { + JITDUMP("Won't enregister V%02i (T%02i) because it's marked as DoNotEnregister\n", + lvaTrackedToVarNum[i], i); + continue; + } +#if !FEATURE_X87_DOUBLES + if (varDsc->TypeGet() == TYP_FLOAT) + { + JITDUMP("Won't enregister V%02i (T%02i) because it's a TYP_FLOAT and we have disabled " + "FEATURE_X87_DOUBLES\n", + lvaTrackedToVarNum[i], i); + continue; + } +#endif + + fpLclFPVars[numFPVars++] = lvaTable + lvaTrackedToVarNum[i]; + } + } + + unsigned maxRegVars = 0; // Max num of regvars at one time + + for (unsigned sortNum = 0; sortNum < numFPVars; sortNum++) + { +#ifdef DEBUG + { + JITDUMP("\n"); + JITDUMP("FP regvar candidates:\n"); + + for (unsigned i = sortNum; i < numFPVars; i++) + { + varDsc = fpLclFPVars[i]; + unsigned lclNum = varDsc - lvaTable; + unsigned varIndex; + varIndex = varDsc->lvVarIndex; + + JITDUMP("V%02u/T%02u RefCount: %u Weight: %u ; Payload: %u ; Overflow: %u\n", lclNum, varIndex, + varDsc->lvRefCnt, varDsc->lvRefCntWtd, raPayloadStackFP[varIndex], + raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]); + } + JITDUMP("\n"); + } +#endif + + unsigned min = sortNum; + + // Find the one that will save us most + for (unsigned i = sortNum + 1; i < numFPVars; i++) + { + if (raVarIsGreaterValueStackFP(fpLclFPVars[i], fpLclFPVars[sortNum])) + { + min = i; + } + } + + // Put it at the top of the array + LclVarDsc* temp; + temp = fpLclFPVars[min]; + fpLclFPVars[min] = fpLclFPVars[sortNum]; + fpLclFPVars[sortNum] = temp; + + varDsc = fpLclFPVars[sortNum]; + +#ifdef DEBUG + unsigned lclNum = varDsc - lvaTable; +#endif + unsigned varIndex = varDsc->lvVarIndex; + + assert(VarSetOps::IsMember(this, optAllFloatVars, varIndex)); + + JITDUMP("Candidate for enregistering: V%02u/T%02u RefCount: %u Weight: %u ; Payload: %u ; Overflow: %u\n", + lclNum, varIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd, raPayloadStackFP[varIndex], + raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]); + + bool bMeetsThreshold = true; + + if (varDsc->lvRefCnt < FPENREGTHRESHOLD || varDsc->lvRefCntWtd < FPENREGTHRESHOLD_WEIGHTED) + { + bMeetsThreshold = false; + } + + // We don't want to enregister arguments with only one use, as they will be + // loaded in the prolog. Just don't enregister them and load them lazily( + if (varDsc->lvIsParam && + (varDsc->lvRefCnt <= FPENREGTHRESHOLD || varDsc->lvRefCntWtd <= FPENREGTHRESHOLD_WEIGHTED)) + { + bMeetsThreshold = false; + } + + if (!bMeetsThreshold +#ifdef DEBUG + && codeGen->genStressFloat() != 1 +#endif + ) + { + // Doesn't meet bar, do next + JITDUMP("V%02u/T%02u doesnt meet threshold. Won't enregister\n", lclNum, varIndex); + continue; + } + + // We don't want to have problems with overflow (we now have 2 unsigned counters + // that can possibly go to their limits), so we just promote to double here. + // diff + double balance = + double(varDsc->lvRefCntWtd) - + double(raPayloadStackFP[varIndex]) - // Additional costs of enregistering variable + double(raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]) - // Spilling costs of enregistering variable + double(FPENREGTHRESHOLD_WEIGHTED); + + JITDUMP("balance = %d - %d - %d - %d\n", varDsc->lvRefCntWtd, raPayloadStackFP[varIndex], + raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS], FPENREGTHRESHOLD_WEIGHTED); + + if (balance < 0.0 +#ifdef DEBUG + && codeGen->genStressFloat() != 1 +#endif + ) + { + // Doesn't meet bar, do next + JITDUMP("V%02u/T%02u doesnt meet threshold. Won't enregister\n", lclNum, varIndex); + continue; + } + + regNumber reg = raRegForVarStackFP(varDsc->lvVarIndex); + if (reg == REG_FPNONE) + { + // Didn't make if (interferes with other regvars), do next + JITDUMP("V%02u/T%02u interferes with other enreg vars. Won't enregister\n", lclNum, varIndex); + + continue; + } + + if (lvaIsFieldOfDependentlyPromotedStruct(varDsc)) + { + // Do not enregister if this is a floating field in a struct local of + // promotion type PROMOTION_TYPE_DEPENDENT. + continue; + } + + // Yipee, we will enregister var. + varDsc->lvRegister = true; + varDsc->lvRegNum = reg; + VarSetOps::AddElemD(this, optAllFPregVars, varIndex); + +#ifdef DEBUG + raDumpVariableRegIntfFloat(); + + if (verbose) + { + printf("; "); + gtDispLclVar(lclNum); + printf("V%02u/T%02u (refcnt=%2u,refwtd=%4u%s) enregistered in %s\n", varIndex, varDsc->lvVarIndex, + varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2, (varDsc->lvRefCntWtd & 1) ? ".5" : "", + CodeGen::regVarNameStackFP(varDsc->lvRegNum)); + } + + JITDUMP("\n"); +#endif + + // Create interferences with other variables. + assert(VarSetOps::IsEmpty(this, VarSetOps::Diff(this, raLclRegIntfFloat[(int)reg], optAllFloatVars))); + VARSET_TP VARSET_INIT_NOCOPY(intfFloats, VarSetOps::Intersection(this, lvaVarIntf[varIndex], optAllFloatVars)); + + VarSetOps::UnionD(this, raLclRegIntfFloat[reg], intfFloats); + + // Update height tables for variables that interfere with this one. + raUpdateHeightsForVarsStackFP(intfFloats); + + // Update max number of reg vars at once. + maxRegVars = min(REG_FPCOUNT, max(maxRegVars, VarSetOps::Count(this, intfFloats))); + } + + assert(VarSetOps::IsSubset(this, optAllFPregVars, optAllFloatVars)); + assert(VarSetOps::IsEmpty(this, VarSetOps::Intersection(this, optAllFPregVars, raMaskDontEnregFloat))); + + // This is a bit conservative, as they may not all go through a call. + // If we have to, we can fix this. + tmpDoubleSpillMax += maxRegVars; + + // Do pass marking trees as egvars + raEnregisterVarsPostPassStackFP(); + +#ifdef DEBUG + { + JITDUMP("FP enregistration summary\n"); + + unsigned i; + for (i = 0; i < numFPVars; i++) + { + varDsc = fpLclFPVars[i]; + + if (varDsc->lvRegister) + { + unsigned lclNum = varDsc - lvaTable; + unsigned varIndex; + varIndex = varDsc->lvVarIndex; + + JITDUMP("Enregistered V%02u/T%02u in FPV%i RefCount: %u Weight: %u \n", lclNum, varIndex, + varDsc->lvRegNum, varDsc->lvRefCnt, varDsc->lvRefCntWtd); + } + } + JITDUMP("End of FP enregistration summary\n\n"); + } +#endif +} + +#ifdef DEBUG + +regMaskTP CodeGenInterface::genStressLockedMaskFloat() +{ + assert(genStressFloat()); + + // Don't use REG_FPV0 or REG_FPV1, they're reserved + if (genStressFloat() == 1) + { + return genRegMaskFloat(REG_FPV4) | genRegMaskFloat(REG_FPV5) | genRegMaskFloat(REG_FPV6) | + genRegMaskFloat(REG_FPV7); + } + else + { + return genRegMaskFloat(REG_FPV2) | genRegMaskFloat(REG_FPV3) | genRegMaskFloat(REG_FPV4) | + genRegMaskFloat(REG_FPV5) | genRegMaskFloat(REG_FPV6) | genRegMaskFloat(REG_FPV7); + } +} + +#endif + +#endif // FEATURE_STACK_FP_X87 + +#endif // LEGACY_BACKEND |