From 4b11dc566a5bbfa1378d6266525c281b028abcc8 Mon Sep 17 00:00:00 2001 From: Jiyoung Yun Date: Fri, 10 Feb 2017 20:35:12 +0900 Subject: Imported Upstream version 1.0.0.9910 --- src/jit/codegencommon.cpp | 151 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 126 insertions(+), 25 deletions(-) (limited to 'src/jit/codegencommon.cpp') diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 240911523f..b1e474b755 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -2475,6 +2475,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind EJ_jle, // GT_LE EJ_jge, // GT_GE EJ_jg, // GT_GT +#ifndef LEGACY_BACKEND + EJ_je, // GT_TEST_EQ + EJ_jne, // GT_TEST_NE +#endif #elif defined(_TARGET_ARMARCH_) EJ_eq, // GT_EQ EJ_ne, // GT_NE @@ -2494,6 +2498,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind EJ_jbe, // GT_LE EJ_jae, // GT_GE EJ_ja, // GT_GT +#ifndef LEGACY_BACKEND + EJ_je, // GT_TEST_EQ + EJ_jne, // GT_TEST_NE +#endif #elif defined(_TARGET_ARMARCH_) EJ_eq, // GT_EQ EJ_ne, // GT_NE @@ -2513,6 +2521,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind EJ_NONE, // GT_LE EJ_jns, // GT_GE (S == 0) EJ_NONE, // GT_GT +#ifndef LEGACY_BACKEND + EJ_NONE, // GT_TEST_EQ + EJ_NONE, // GT_TEST_NE +#endif #elif defined(_TARGET_ARMARCH_) EJ_eq, // GT_EQ (Z == 1) EJ_ne, // GT_NE (Z == 0) @@ -2530,6 +2542,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_jle); assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_jge); assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_jg); +#ifndef LEGACY_BACKEND + assert(genJCCinsSigned[GT_TEST_EQ - GT_EQ] == EJ_je); + assert(genJCCinsSigned[GT_TEST_NE - GT_EQ] == EJ_jne); +#endif assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_je); assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_jne); @@ -2537,6 +2553,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_jbe); assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_jae); assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_ja); +#ifndef LEGACY_BACKEND + assert(genJCCinsUnsigned[GT_TEST_EQ - GT_EQ] == EJ_je); + assert(genJCCinsUnsigned[GT_TEST_NE - GT_EQ] == EJ_jne); +#endif assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_je); assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_jne); @@ -3145,12 +3165,17 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode) /* Check our max stack level. Needed for fgAddCodeRef(). We need to relax the assert as our estimation won't include code-gen stack changes (which we know don't affect fgAddCodeRef()) */ - noway_assert(getEmitter()->emitMaxStackDepth <= - (compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments. - compiler->compHndBBtabCount + // Return address for locally-called finallys - genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc - (compiler->compTailCallUsed ? 4 : 0))); // CORINFO_HELP_TAILCALL args + { + unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments. + compiler->compHndBBtabCount + // Return address for locally-called finallys + genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc + (compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args +#if defined(UNIX_X86_ABI) + maxAllowedStackDepth += genTypeStSz(TYP_INT) * 3; // stack align for x86 - allow up to 3 INT's for padding #endif + noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth); + } +#endif // EMIT_TRACK_STACK_DEPTH *nativeSizeOfCode = codeSize; compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize; @@ -10241,6 +10266,66 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // Look in CodeGenArm64.cpp +#elif defined(_TARGET_X86_) + +/***************************************************************************** + * + * Generates code for an EH funclet prolog. + */ + +void CodeGen::genFuncletProlog(BasicBlock* block) +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In genFuncletProlog()\n"); + } +#endif + + ScopedSetVariable _setGeneratingProlog(&compiler->compGeneratingProlog, true); + + compiler->unwindBegProlog(); + + // TODO Save callee-saved registers + + // This is the end of the OS-reported prolog for purposes of unwinding + compiler->unwindEndProlog(); +} + +/***************************************************************************** + * + * Generates code for an EH funclet epilog. + */ + +void CodeGen::genFuncletEpilog() +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In genFuncletEpilog()\n"); + } +#endif + + ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); + + // TODO Restore callee-saved registers + + instGen_Return(0); +} + +/***************************************************************************** + * + * Capture the information used to generate the funclet prologs and epilogs. + */ + +void CodeGen::genCaptureFuncletPrologEpilogInfo() +{ + if (!compiler->ehAnyFunclets()) + { + return; + } +} + #else // _TARGET_* /***************************************************************************** @@ -10583,6 +10668,7 @@ GenTreePtr CodeGen::genMakeConst(const void* cnsAddr, var_types cnsType, GenTree // funclet frames: this will be FuncletInfo.fiSpDelta. void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize) { + genVzeroupperIfNeeded(false); regMaskTP regMask = compiler->compCalleeFPRegsSavedMask; // Only callee saved floating point registers should be in regMask @@ -10621,16 +10707,6 @@ void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize) offset -= XMM_REGSIZE_BYTES; } } - -#ifdef FEATURE_AVX_SUPPORT - // Just before restoring float registers issue a Vzeroupper to zero out upper 128-bits of all YMM regs. - // This is to avoid penalty if this routine is using AVX-256 and now returning to a routine that is - // using SSE2. - if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX) - { - instGen(INS_vzeroupper); - } -#endif } // Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working @@ -10651,6 +10727,7 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize) // fast path return if (regMask == RBM_NONE) { + genVzeroupperIfNeeded(); return; } @@ -10682,16 +10759,6 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize) assert((offset % 16) == 0); #endif // _TARGET_AMD64_ -#ifdef FEATURE_AVX_SUPPORT - // Just before restoring float registers issue a Vzeroupper to zero out upper 128-bits of all YMM regs. - // This is to avoid penalty if this routine is using AVX-256 and now returning to a routine that is - // using SSE2. - if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX) - { - instGen(INS_vzeroupper); - } -#endif - for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg)) { regMaskTP regBit = genRegMask(reg); @@ -10706,7 +10773,41 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize) offset -= XMM_REGSIZE_BYTES; } } + genVzeroupperIfNeeded(); } + +// Generate Vzeroupper instruction as needed to zero out upper 128b-bit of all YMM registers so that the +// AVX/Legacy SSE transition penalties can be avoided. This function is been used in genPreserveCalleeSavedFltRegs +// (prolog) and genRestoreCalleeSavedFltRegs (epilog). Issue VZEROUPPER in Prolog if the method contains +// 128-bit or 256-bit AVX code, to avoid legacy SSE to AVX transition penalty, which could happen when native +// code contains legacy SSE code calling into JIT AVX code (e.g. reverse pinvoke). Issue VZEROUPPER in Epilog +// if the method contains 256-bit AVX code, to avoid AVX to legacy SSE transition penalty. +// +// Params +// check256bitOnly - true to check if the function contains 256-bit AVX instruction and generate Vzeroupper +// instruction, false to check if the function contains AVX instruciton (either 128-bit or 256-bit). +// +void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/) +{ +#ifdef FEATURE_AVX_SUPPORT + bool emitVzeroUpper = false; + if (check256bitOnly) + { + emitVzeroUpper = getEmitter()->Contains256bitAVX(); + } + else + { + emitVzeroUpper = getEmitter()->ContainsAVX(); + } + + if (emitVzeroUpper) + { + assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX); + instGen(INS_vzeroupper); + } +#endif +} + #endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87 //----------------------------------------------------------------------------------- -- cgit v1.2.3