//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
//

/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XX                                                                           XX
XX                             emitX86.cpp                                   XX
XX                                                                           XX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/

#include "jitpch.h"
#ifdef _MSC_VER
#pragma hdrstop
#endif

#if defined(_TARGET_XARCH_)

/*****************************************************************************/
/*****************************************************************************/

#include "instr.h"
#include "emit.h"
#include "codegen.h"

bool IsSSE2Instruction(instruction ins) 
{ 
    return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_SSE2_INSTRUCTION);
}

bool IsSSEOrAVXInstruction(instruction ins) 
{
#ifdef FEATURE_AVX_SUPPORT
    return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_AVX_INSTRUCTION);
#else // !FEATURE_AVX_SUPPORT
    return IsSSE2Instruction(ins);
#endif // !FEATURE_AVX_SUPPORT
}

bool emitter::IsAVXInstruction(instruction ins)
{
#ifdef FEATURE_AVX_SUPPORT
    return (UseAVX() && IsSSEOrAVXInstruction(ins));
#else
    return false;
#endif
}

#define REX_PREFIX_MASK          0xFF00000000LL

#ifdef FEATURE_AVX_SUPPORT
// Returns true if the AVX instruction is a binary operator that requires 3 operands.
// When we emit an instruction with only two operands, we will duplicate the destination
// as a source.
// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
// be formalized by adding an additional field to instruction table to 
// to indicate whether a 3-operand instruction.
bool emitter::IsThreeOperandBinaryAVXInstruction(instruction ins)
{
    return IsAVXInstruction(ins) &&
           (
            ins == INS_cvtsi2ss || ins == INS_cvtsi2sd ||
            ins == INS_cvtss2sd || ins == INS_cvtsd2ss ||
            ins == INS_addss    || ins == INS_addsd    ||
            ins == INS_subss    || ins == INS_subsd    ||
            ins == INS_mulss    || ins == INS_mulsd    ||
            ins == INS_divss    || ins == INS_divsd    ||
            ins == INS_addps    || ins == INS_addpd    ||
            ins == INS_subps    || ins == INS_subpd    ||
            ins == INS_mulps    || ins == INS_mulpd    ||
            ins == INS_cmpps    || ins == INS_cmppd    ||
            ins == INS_andps    || ins == INS_andpd    ||
            ins == INS_orps     || ins == INS_orpd     ||
            ins == INS_xorps    || ins == INS_xorpd    ||
            ins == INS_dpps     || ins == INS_dppd     ||
            ins == INS_haddpd   || ins == INS_por      ||
            ins == INS_pand     || ins == INS_pandn    ||
            ins == INS_pcmpeqd  || ins == INS_pcmpgtd  ||
            ins == INS_pcmpeqw  || ins == INS_pcmpgtw  ||
            ins == INS_pcmpeqb  || ins == INS_pcmpgtb  ||
            ins == INS_pcmpeqq  || ins == INS_pcmpgtq  ||
            ins == INS_pmulld   || ins == INS_pmullw   ||

            ins == INS_shufps   || ins == INS_shufpd   ||
            ins == INS_minps    || ins == INS_minss    ||
            ins == INS_minpd    || ins == INS_minsd    ||
            ins == INS_divps    || ins == INS_divpd    ||
            ins == INS_maxps    || ins == INS_maxpd    ||
            ins == INS_maxss    || ins == INS_maxsd    ||
            ins == INS_andnps   || ins == INS_andnpd   ||
            ins == INS_paddb    || ins == INS_paddw    ||
            ins == INS_paddd    || ins == INS_paddq    ||
            ins == INS_psubb    || ins == INS_psubw    ||
            ins == INS_psubd    || ins == INS_psubq    ||
            ins == INS_pmuludq  || ins == INS_pxor     ||
            ins == INS_pmaxub   || ins == INS_pminub   ||
            ins == INS_pmaxsw   || ins == INS_pminsw   ||
            ins == INS_insertps || ins == INS_vinsertf128

            );
}
// Returns true if the AVX instruction is a move operator that requires 3 operands.
// When we emit an instruction with only two operands, we will duplicate the source
// register in the vvvv field.  This is because these merge sources into the dest.
// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
// be formalized by adding an additional field to instruction table to 
// to indicate whether a 3-operand instruction.
bool emitter::IsThreeOperandMoveAVXInstruction(instruction ins)
{
    return IsAVXInstruction(ins) &&
           (
            ins == INS_movlpd   || ins == INS_movlps   ||
            ins == INS_movhpd   || ins == INS_movhps   ||
            ins == INS_movss
            );
}
#endif

// Returns true if the AVX instruction is a 4-byte opcode.
// Note that this should be true for any of the instructions in instrsXArch.h
// that use the SSE38 or SSE3A macro.
// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this
// needs to be addressed by expanding instruction encodings.
bool Is4ByteAVXInstruction(instruction ins)
{
#ifdef FEATURE_AVX_SUPPORT
    return (
             ins == INS_dpps         ||
             ins == INS_dppd         ||
             ins == INS_insertps     ||
             ins == INS_pcmpeqq      ||
             ins == INS_pcmpgtq      ||
             ins == INS_vbroadcastss || 
             ins == INS_vbroadcastsd || 
             ins == INS_vpbroadcastb ||
             ins == INS_vpbroadcastw ||
             ins == INS_vpbroadcastd ||
             ins == INS_vpbroadcastq ||
             ins == INS_vextractf128 ||
             ins == INS_vinsertf128 ||
             ins == INS_pmulld
           );
#else
    return false;
#endif
}


#ifdef FEATURE_AVX_SUPPORT
// Returns true if this instruction requires a VEX prefix
// All AVX instructions require a VEX prefix
bool emitter::TakesVexPrefix(instruction ins)
{
    // special case vzeroupper as it requires 2-byte VEX prefix
    if (ins == INS_vzeroupper)
    {
        return false;
    }

    return IsAVXInstruction(ins);    
}

// Add base VEX prefix without setting W, R, X, or B bits
// L bit will be set based on emitter attr.
//
// 3-byte VEX prefix = C4 <R,X,B,m-mmmm> <W,vvvv,L,pp>
//  - R, X, B, W - bits to express corresponding REX prefixes
//  - m-mmmmm (5-bit)
//    0-00001 - implied leading 0F opcode byte
//    0-00010 - implied leading 0F 38 opcode bytes
//    0-00011 - implied leading 0F 3A opcode bytes
//    Rest    - reserved for future use and usage of them will uresult in Undefined instruction exception
//
// - vvvv (4-bits) - register specifier in 1's complement form; must be 1111 if unused
// - L - scalar or AVX-128 bit operations (L=0),  256-bit operations (L=1)
// - pp (2-bits) - opcode extension providing equivalent functionality of a SIMD size prefix
//                 these prefixes are treated mandatory when used with escape opcode 0Fh for
//                 some SIMD instructions
//   00  - None   (0F    - packed float)
//   01  - 66     (66 0F - packed double)
//   10  - F3     (F3 0F - scalar float
//   11  - F2     (F2 0F - scalar double)
//
// TODO-AMD64-CQ: for simplicity of implementation this routine always adds 3-byte VEX
// prefix. Based on 'attr' param we could add 2-byte VEX prefix in case of scalar
// and AVX-128 bit operations.
#define DEFAULT_3BYTE_VEX_PREFIX  0xC4E07800000000ULL
#define LBIT_IN_3BYTE_VEX_PREFIX  0X00000400000000ULL
size_t emitter::AddVexPrefix(instruction ins, size_t code, emitAttr attr)
{
    // Only AVX instructions require VEX prefix
    assert(IsAVXInstruction(ins));

    // Shouldn't have already added Vex prefix
    assert(!hasVexPrefix(code));

    // Set L bit to 1 in case of instructions that operate on 256-bits.
    code |= DEFAULT_3BYTE_VEX_PREFIX;
    if (attr == EA_32BYTE)
    {
        code |= LBIT_IN_3BYTE_VEX_PREFIX;
    }

    return code;
}
#endif // FEATURE_AVX_SUPPORT

// Returns true if this instruction, for the given EA_SIZE(attr), will require a REX.W prefix
bool TakesRexWPrefix(instruction ins, emitAttr attr)
{
#ifdef _TARGET_AMD64_
    // movsx should always sign extend out to 8 bytes just because we don't track
    // whether the dest should be 4 bytes or 8 bytes (attr indicates the size
    // of the source, not the dest).
    // A 4-byte movzx is equivalent to an 8 byte movzx, so it is not special
    // cased here.
    //
    // Rex_jmp = jmp with rex prefix always requires rex.w prefix.
    if (ins == INS_movsx || ins == INS_rex_jmp)
    {
        return true;
    }

    if (EA_SIZE(attr) != EA_8BYTE)
    {
        return false;
    }

    if (IsSSEOrAVXInstruction(ins))
    {
        if (ins == INS_cvttsd2si || 
            ins == INS_cvttss2si || 
            ins == INS_cvtsd2si  || 
            ins == INS_cvtss2si  ||
            ins == INS_cvtsi2sd  ||
            ins == INS_cvtsi2ss  ||
            ins == INS_mov_xmm2i ||
            ins == INS_mov_i2xmm)
        {
            return true;
        } 

        return false;
    }

    // TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these
    // opcodes...
    // These are all the instructions that default to 8-byte operand without the REX.W bit
    // With 1 special case: movzx because the 4 byte version still zeros-out the hi 4 bytes
    // so we never need it
    if ((ins != INS_push) &&
        (ins != INS_pop) &&
        (ins != INS_movq) &&
        (ins != INS_movzx) &&
        (ins != INS_push_hide) &&
        (ins != INS_pop_hide) &&
        (ins != INS_ret) &&
        (ins != INS_call) &&
        !((ins >= INS_i_jmp) && (ins <= INS_l_jg)))
    {
        return true;
    }
    else
    {
        return false;
    }
#else //!_TARGET_AMD64 = _TARGET_X86_
    return false;
#endif //!_TARGET_AMD64_
}

// Returns true if using this register will require a REX.* prefix.
// Since XMM registers overlap with YMM registers, this routine
// can also be used to know whether a YMM register if the 
// instruction in question is AVX.
bool IsExtendedReg(regNumber reg)
{
#ifdef _TARGET_AMD64_
    return ((reg >= REG_R8) && (reg <= REG_R15)) ||
           ((reg >= REG_XMM8) && (reg <= REG_XMM15));
#else
    // X86 JIT operates in 32-bit mode and hence extended reg are not available.
    return false;
#endif
}

// Returns true if using this register, for the given EA_SIZE(attr), will require a REX.* prefix
bool IsExtendedReg(regNumber reg, emitAttr attr)
{
#ifdef _TARGET_AMD64_
    // Not a register, so doesn't need a prefix
    if (reg > REG_XMM15)
        return false;

    // Opcode field only has 3 bits for the register, these high registers
    // need a 4th bit, that comes from the REX prefix (eiter REX.X, REX.R, or REX.B)
    if (IsExtendedReg(reg))
        return true;

    if (EA_SIZE(attr) != EA_1BYTE)
        return false;

    // There are 12 one byte registers addressible 'below' r8b:
    //     al, cl, dl, bl, ah, ch, dh, bh, spl, bpl, sil, dil.
    // The first 4 are always addressible, the last 8 are divided into 2 sets:
    //     ah,  ch,  dh,  bh
    //          -- or --
    //     spl, bpl, sil, dil
    // Both sets are encoded exactly the same, the difference is the presence
    // of a REX prefix, even a REX prefix with no other bits set (0x40).
    // So in order to get to the second set we need a REX prefix (but no bits).
    //
    // TODO-AMD64-CQ: if we ever want to start using the first set, we'll need a different way of
    // encoding/tracking/encoding registers.
    return (reg >= REG_RSP);
#else
    // X86 JIT operates in 32-bit mode and hence extended reg are not available.
    return false;
#endif
}

// Amd64: Since XMM registers overlap with YMM registers, this routine
// can also used to know whether a YMM register in case of AVX instructions.
//
// X86: we have XMM0-XMM7 available but this routine cannot be used to 
// determine whether a reg is XMM because they share the same reg numbers
// with integer registers.  Hence always return false.
bool IsXMMReg(regNumber reg)
{
#ifdef _TARGET_AMD64_
    return (reg >= REG_XMM0) && (reg <= REG_XMM15);
#else
    return false;
#endif
}

// Returns bits to be encoded in instruction for the given register.
regNumber RegEncoding(regNumber reg)
{
#ifdef _TARGET_AMD64_
    // Amd64: XMM registers do not share the same reg numbers as integer registers.
    // But register encoding of integer and XMM registers is the same.
    // Therefore, subtract XMMBASE from regNumber to get the register encoding 
    // in case of XMM registers.
    return (regNumber)((IsXMMReg(reg) ? reg-XMMBASE : reg) & 0x7);
#else
    // X86: XMM registers share the same reg numbers as integer registers and
    // hence nothing to do to get reg encoding.
    return (regNumber) (reg & 0x7);
#endif
}

// Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes
// SSE2: separate 1-byte prefix gets added before opcode.
// AVX:  specific bits within VEX prefix need to be set in bit-inverted form.
size_t emitter::AddRexWPrefix(instruction ins, size_t code)
{
#ifdef _TARGET_AMD64_
    if (UseAVX() && IsAVXInstruction(ins))
    {
        // W-bit is available only in 3-byte VEX prefix that starts with byte C4.
        assert(hasVexPrefix(code));

        // W-bit is the only bit that is added in non bit-inverted form.
        return code | 0x00008000000000ULL;
    }

    return code | 0x4800000000ULL;
#else
    assert(!"UNREACHED"); 
    return code;
#endif
}

#ifdef _TARGET_AMD64_

size_t emitter::AddRexRPrefix(instruction ins, size_t code)
{
    if (UseAVX() && IsAVXInstruction(ins))
    {
        // Right now support 3-byte VEX prefix
        assert(hasVexPrefix(code));

        // R-bit is added in bit-inverted form.
        return code & 0xFF7FFFFFFFFFFFULL;
    }

    return code | 0x4400000000ULL;
}

size_t emitter::AddRexXPrefix(instruction ins, size_t code)
{
    if (UseAVX() && IsAVXInstruction(ins))
    {
        //Right now support 3-byte VEX prefix
        assert(hasVexPrefix(code));

        // X-bit is added in bit-inverted form.
        return code & 0xFFBFFFFFFFFFFFULL;
    }

    return code | 0x4200000000ULL;
}

size_t emitter::AddRexBPrefix(instruction ins, size_t code)
{
    if (UseAVX() && IsAVXInstruction(ins))
    {
        // Right now support 3-byte VEX prefix
        assert(hasVexPrefix(code));

        // B-bit is added in bit-inverted form.
        return code & 0xFFDFFFFFFFFFFFULL;
    }

    return code | 0x4100000000ULL;
}

// Adds REX prefix (0x40) without W, R, X or B bits set
size_t emitter::AddRexPrefix(instruction ins, size_t code)
{
    assert(!UseAVX() || !IsAVXInstruction(ins));
    return code | 0x4000000000ULL;
}

bool isPrefix(BYTE b)
{
    assert(b != 0); // Caller should check this
    assert(b != 0x67); // We don't use the address size prefix
    assert(b != 0x65); // The GS segment override prefix is emitted separately
    assert(b != 0x64); // The FS segment override prefix is emitted separately
    assert(b != 0xF0); // The lock prefix is emitted separately
    assert(b != 0x2E); // We don't use the CS segment override prefix
    assert(b != 0x3E); // Or the DS segment override prefix
    assert(b != 0x26); // Or the ES segment override prefix
    assert(b != 0x36); // Or the SS segment override prefix

    // That just leaves the size prefixes used in SSE opcodes:
    //      Scalar Double  Scalar Single  Packed Double
    return ((b == 0xF2) || (b == 0xF3) || (b == 0x66));
}

#endif //_TARGET_AMD64_

// Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise.
unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, size_t & code)
{
#ifdef _TARGET_AMD64_
    if (hasVexPrefix(code))
    {
        // Only AVX instructions should have a VEX prefix
        assert(UseAVX() && IsAVXInstruction(ins));
        size_t vexPrefix = (code >> 32) & 0x00FFFFFF;
        code &= 0x00000000FFFFFFFFLL;

        WORD leadingBytes = 0;
        BYTE check = (code >> 24) & 0xFF;        
        if (check != 0)
        {
            // 3-byte opcode: with the bytes ordered as 0x2211RM33 or 
            // 4-byte opcode: with the bytes ordered as 0x22114433
            // check for a prefix in the 11 position
            BYTE sizePrefix = (code >> 16) & 0xFF;
            if (sizePrefix != 0 && isPrefix(sizePrefix))
            {
                // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits
                // 
                //   00  - None   (0F    - packed float)
                //   01  - 66     (66 0F - packed double)
                //   10  - F3     (F3 0F - scalar float
                //   11  - F2     (F2 0F - scalar double)
                switch (sizePrefix)
                {
                case 0x66:
                    vexPrefix |= 0x01;
                    break;
                case 0xF3:
                    vexPrefix |= 0x02;
                    break;
                case 0xF2:
                    vexPrefix |= 0x03;
                    break;
                default:
                    assert(!"unrecognized SIMD size prefix");
                    unreached();
                }

                // Now the byte in the 22 position must be an escape byte 0F
                leadingBytes = check;
                assert(leadingBytes == 0x0F);
                
                // Get rid of both sizePrefix and escape byte
                code &= 0x0000FFFFLL;

                // Check the byte in the 33 position to see if it is 3A or 38.
                // In such a case escape bytes must be 0x0F3A or 0x0F38
                check = code & 0xFF;
                if (check == 0x3A || check == 0x38)
                {
                    leadingBytes = (leadingBytes << 8) | check;
                    code &= 0x0000FF00LL;
                }
            }
        }
        else
        {
            // 2-byte opcode with the bytes ordered as 0x0011RM22
            // the byte in position 11 must be an escape byte.
            leadingBytes = (code >> 16) & 0xFF;
            assert(leadingBytes == 0x0F || leadingBytes == 0x00);
            code &= 0xFFFF;
        }
        
        // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38
        // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these 
        // implied leading bytes
        switch (leadingBytes)
        {
        case 0x00:
            // there is no leading byte
            break;
        case 0x0F:
            vexPrefix |= 0x0100;
            break;
        case 0x0F38:
            vexPrefix |= 0x0200;
            break;
        case 0x0F3A:
            vexPrefix |= 0x0300;
            break;
        default:
            assert(!"encountered unknown leading bytes");
            unreached();
        }

        // At this point 
        //     VEX.2211RM33 got transformed as VEX.0000RM33
        //     VEX.0011RM22 got transformed as VEX.0000RM22
        // 
        // Now output VEX prefix leaving the 4-byte opcode
        emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF));
        emitOutputByte(dst+1, ((vexPrefix >> 8) & 0xFF));
        emitOutputByte(dst+2, vexPrefix & 0xFF);
        return 3;
    }
    else if (code > 0x00FFFFFFFFLL)
    {
        BYTE prefix = (code >> 32) & 0xFF;
        noway_assert(prefix >= 0x40 && prefix <= 0x4F);
        code &= 0x00000000FFFFFFFFLL;

        // TODO-AMD64-Cleanup: when we remove the prefixes (just the SSE opcodes right now)
        // we can remove this code as well

        // The REX prefix is required to come after all other prefixes.
        // Some of our 'opcodes' actually include some prefixes, if that
        // is the case, shift them over and place the REX prefix after
        // the other prefixes, and emit any prefix that got moved out.
        BYTE check = (code >> 24) & 0xFF;
        if (check == 0)
        {
            // 3-byte opcode: with the bytes ordered as 0x00113322
            // check for a prefix in the 11 position
            check = (code >> 16) & 0xFF;
            if (check != 0 && isPrefix(check))
            {
                // Swap the rex prefix and whatever this prefix is
                code = (((DWORD)prefix << 16) | (code & 0x0000FFFFLL));
                // and then emit the other prefix
                return emitOutputByte(dst, check);
            }
        }
        else
        {
            // 4-byte opcode with the bytes ordered as 0x22114433
            // first check for a prefix in the 11 position
            BYTE check2 = (code >> 16) & 0xFF;
            if (isPrefix(check2))
            {
                assert(!isPrefix(check)); // We currently don't use this, so it is untested
                if (isPrefix(check))
                {
                    // 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX
                    // Change to c2rrc1XXXX, and emit check2 now
                    code = (((size_t)prefix << 24) | ((size_t)check << 16) | (code & 0x0000FFFFLL));
                }
                else
                {
                    // 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode)
                    // Change to c2XXrrXXXX, and emit check2 now
                    code = (((size_t)check << 24) | ((size_t)prefix << 16) | (code & 0x0000FFFFLL));
                }
                return emitOutputByte(dst, check2);
            }
        }

        return emitOutputByte(dst, prefix);
    }
#endif //_TARGET_AMD64_

    return 0;
}

#ifdef _TARGET_AMD64_
/*****************************************************************************
 * Is the last instruction emitted a call instruction?
 */
bool            emitter::emitIsLastInsCall()
{
    if ((emitLastIns != nullptr) && (emitLastIns->idIns() == INS_call))
        return true;

    return false;
}

/*****************************************************************************
 * We're about to create an epilog. If the last instruction we output was a 'call',
 * then we need to insert a NOP, to allow for proper exception-handling behavior.
 */
void                emitter::emitOutputPreEpilogNOP()
{
    if (emitIsLastInsCall())
    {
        emitIns(INS_nop);
    }
}

#endif //_TARGET_AMD64_

// Size of rex prefix in bytes
unsigned  emitter::emitGetRexPrefixSize(instruction ins)
{

    // In case of AVX instructions, REX prefixes are part of VEX prefix.
    // And hence requires no additional byte to encode REX prefixes.
    if (IsAVXInstruction(ins))
        return 0;

    // If not AVX, then we would need 1-byte to encode REX prefix.
    return 1;
}

// Size of vex prefix in bytes
unsigned  emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr)
{
    // TODO-XArch-CQ: right now we default to 3-byte VEX prefix. There is a 
    // scope for size win by using 2-byte vex prefix for some of the 
    // scalar, avx-128 and most common avx-256 instructions.
    if (IsAVXInstruction(ins))
        return 3;

    // If not AVX, then we don't need to encode vex prefix.
    return 0;
}

// VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces.  
// Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always overstimate.  
// Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so that 
// instruction size estimate will be accurate.
// Basically this function will decrease the vexPrefixSize, 
// so that opcodeSize + vexPrefixAdjustedSize will be the right size.
// rightOpcodeSize + vexPrefixSize
//=(opcodeSize - ExtrabytesSize) + vexPrefixSize 
//=opcodeSize + (vexPrefixSize - ExtrabytesSize)
//=opcodeSize + vexPrefixAdjustedSize
unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, size_t code)
{
#ifdef _TARGET_AMD64_
    if (IsAVXInstruction(ins))
    {
        unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr);
        // Currently vex prefix size is hard coded as 3 bytes, 
        // In future we should support 2 bytes vex prefix.   
        assert(vexPrefixAdjustedSize == 3);
         
        // In this case, opcode will contains escape prefix at least one byte,
        // vexPrefixAdjustedSize should be minus one.  
        vexPrefixAdjustedSize -= 1;
            
        // Get the fourth byte in Opcode. 
        // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not. 
        BYTE check = (code >> 24) & 0xFF;
        if (check != 0)
        {
            // 3-byte opcode: with the bytes ordered as 0x2211RM33 or 
            // 4-byte opcode: with the bytes ordered as 0x22114433
            // Simd prefix is at the first byte.
            BYTE sizePrefix = (code >> 16) & 0xFF;
            if (sizePrefix != 0 && isPrefix(sizePrefix))
            {
                vexPrefixAdjustedSize -= 1;
            }

            // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode.
            // But in this case the opcode has not counted R\M part.
            // opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize
            //=opcodeSize + VexPrefixAdjustedSize -1 + 1 
            //=opcodeSize + VexPrefixAdjustedSize
            // So although we may have second byte escape prefix, we won't decrease vexPrefixAjustedSize.
        }
            
        return vexPrefixAdjustedSize;
    }
#endif
    return 0;
}

// Get size of rex or vex prefix emitted in code
unsigned  emitter::emitGetPrefixSize(size_t code)
{
#ifdef FEATURE_AVX_SUPPORT
    if (code & VEX_PREFIX_MASK_3BYTE)
    {
        return 3;
    }
    else 
#endif    
    if (code & REX_PREFIX_MASK)
    {
        return 1;
    }

    return 0;
}


#ifdef _TARGET_X86_
/*****************************************************************************
 *
 *  Record a non-empty stack
 */

void                emitter::emitMarkStackLvl(unsigned stackLevel)
{
    assert(int(stackLevel)     >= 0);
    assert(emitCurStackLvl     == 0);
    assert(emitCurIG->igStkLvl == 0);
    assert(emitCurIGfreeNext == emitCurIGfreeBase);

    assert(stackLevel && stackLevel % sizeof(int) == 0);

    emitCurStackLvl = emitCurIG->igStkLvl = stackLevel;

    if (emitMaxStackDepth < emitCurStackLvl)
        emitMaxStackDepth = emitCurStackLvl;
}
#endif

/*****************************************************************************
 *
 *  Get hold of the address mode displacement value for an indirect call.
 */

inline
ssize_t             emitter::emitGetInsCIdisp(instrDesc* id)
{
    if  (id->idIsLargeCall())
    {
        return  ((instrDescCGCA*)id)->idcDisp;
    }
    else
    {
        assert(!id->idIsLargeDsp());
        assert(!id->idIsLargeCns());

        return  id->idAddr()->iiaAddrMode.amDisp;
    }
}

/** ***************************************************************************
 *
 *  The following table is used by the instIsFP()/instUse/DefFlags() helpers.
 */

#define INST_DEF_FL 0x20                // does the instruction set flags?
#define INST_USE_FL 0x40                // does the instruction use flags?

const BYTE          CodeGenInterface::instInfo[] =
{
    #define INST0(id, nm, fp, um, rf, wf, mr                 ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
    #define INST1(id, nm, fp, um, rf, wf, mr                 ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
    #define INST2(id, nm, fp, um, rf, wf, mr, mi             ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
    #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm         ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
    #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4     ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
    #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
    #include "instrs.h"
    #undef  INST0
    #undef  INST1
    #undef  INST2
    #undef  INST3
    #undef  INST4
    #undef  INST5
};

/*****************************************************************************
 *
 *  Initialize the table used by emitInsModeFormat().
 */

const BYTE          emitter::emitInsModeFmtTab[] =
{
    #define INST0(id, nm, fp, um, rf, wf, mr                ) um,
    #define INST1(id, nm, fp, um, rf, wf, mr                ) um,
    #define INST2(id, nm, fp, um, rf, wf, mr, mi            ) um,
    #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) um,
    #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) um,
    #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) um,
    #include "instrs.h"
    #undef  INST0
    #undef  INST1
    #undef  INST2
    #undef  INST3
    #undef  INST4
    #undef  INST5
};

#ifdef  DEBUG
unsigned const      emitter::emitInsModeFmtCnt = sizeof(emitInsModeFmtTab)/
                                                 sizeof(emitInsModeFmtTab[0]);
#endif

/*****************************************************************************
 *
 *  Combine the given base format with the update mode of the instuction.
 */


inline
emitter::insFormat   emitter::emitInsModeFormat(instruction ins, insFormat base)
{
    assert(IF_RRD + IUM_RD == IF_RRD);
    assert(IF_RRD + IUM_WR == IF_RWR);
    assert(IF_RRD + IUM_RW == IF_RRW);

    return  (insFormat)(base + emitInsUpdateMode(ins));
}

/*****************************************************************************
 *
 *  A version of scInsModeFormat() that handles X87 floating-point instructions.
 */

#if FEATURE_STACK_FP_X87
emitter::insFormat   emitter::emitInsModeFormat(instruction ins, insFormat base,
                                                                  insFormat FPld,
                                                                  insFormat FPst)
{
    if  (CodeGen::instIsFP(ins))
    {
        assert(IF_TRD_SRD + 1 == IF_TWR_SRD);
        assert(IF_TRD_SRD + 2 == IF_TRW_SRD);

        assert(IF_TRD_MRD + 1 == IF_TWR_MRD);
        assert(IF_TRD_MRD + 2 == IF_TRW_MRD);

        assert(IF_TRD_ARD + 1 == IF_TWR_ARD);
        assert(IF_TRD_ARD + 2 == IF_TRW_ARD);

        switch (ins)
        {
        case INS_fst:
        case INS_fstp:
        case INS_fistp:
        case INS_fistpl:
            return  (insFormat)(FPst  );

        case INS_fld:
        case INS_fild:
            return  (insFormat)(FPld+1);

        case INS_fcomp:
        case INS_fcompp:
        case INS_fcomip:
            return  (insFormat)(FPld  );

        default:
            return  (insFormat)(FPld+2);
        }
    }
    else
    {
        return  emitInsModeFormat(ins, base);
    }
}
#endif // FEATURE_STACK_FP_X87


// This is a helper we need due to Vs Whidbey #254016 in order to distinguish
// if we can not possibly be updating an integer register. This is not the best
// solution, but the other ones (see bug) are going to be much more complicated.
// The issue here is that on x86, the XMM registers us the same register numbers
// as the general purpose registers, so we need to distinguish them.
// We really only need this for x86 where this issue exists.
bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
{
    instruction  ins       = id->idIns();
   
    // The following SSE2 instructions write to a general purpose integer register.
    if (!IsSSEOrAVXInstruction(ins)
        || ins == INS_mov_xmm2i 
        || ins == INS_cvttsd2si
#ifdef _TARGET_AMD64_
        || ins == INS_cvttss2si 
        || ins == INS_cvtsd2si 
        || ins == INS_cvtss2si
#endif // _TARGET_AMD64_
        )
    {
        return false;
    }

    return true;
}

/*****************************************************************************
 *
 *  Returns the base encoding of the given CPU instruction.
 */

inline
size_t              insCode(instruction ins)
{
    const static
    size_t          insCodes[] =
    {
        #define INST0(id, nm, fp, um, rf, wf, mr                ) mr,
        #define INST1(id, nm, fp, um, rf, wf, mr                ) mr,
        #define INST2(id, nm, fp, um, rf, wf, mr, mi            ) mr,
        #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) mr,
        #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) mr,
        #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mr,
        #include "instrs.h"
        #undef  INST0
        #undef  INST1
        #undef  INST2
        #undef  INST3
        #undef  INST4
        #undef  INST5
    };

    assert((unsigned)ins < sizeof(insCodes)/sizeof(insCodes[0]));
    assert((insCodes[ins] != BAD_CODE));

    return  insCodes[ins];
}

/*****************************************************************************
 *
 *  Returns the "[r/m], 32-bit icon" encoding of the given CPU instruction.
 */

inline
size_t              insCodeMI(instruction ins)
{
    const static
    size_t          insCodesMI[] =
    {
        #define INST0(id, nm, fp, um, rf, wf, mr                )
        #define INST1(id, nm, fp, um, rf, wf, mr                )
        #define INST2(id, nm, fp, um, rf, wf, mr, mi            ) mi,
        #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) mi,
        #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) mi,
        #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mi,
        #include "instrs.h"
        #undef  INST0
        #undef  INST1
        #undef  INST2
        #undef  INST3
        #undef  INST4
        #undef  INST5
    };

    assert((unsigned)ins < sizeof(insCodesMI)/sizeof(insCodesMI[0]));
    assert((insCodesMI[ins] != BAD_CODE));

    return  insCodesMI[ins];
}

/*****************************************************************************
 *
 *  Returns the "reg, [r/m]" encoding of the given CPU instruction.
 */

inline
size_t              insCodeRM(instruction ins)
{
    const static
    size_t          insCodesRM[] =
    {
        #define INST0(id, nm, fp, um, rf, wf, mr                )
        #define INST1(id, nm, fp, um, rf, wf, mr                )
        #define INST2(id, nm, fp, um, rf, wf, mr, mi            )
        #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) rm,
        #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) rm,
        #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rm,
        #include "instrs.h"
        #undef  INST0
        #undef  INST1
        #undef  INST2
        #undef  INST3
        #undef  INST4
        #undef  INST5
    };

    assert((unsigned)ins < sizeof(insCodesRM)/sizeof(insCodesRM[0]));
    assert((insCodesRM[ins] != BAD_CODE));

    return  insCodesRM[ins];
}

/*****************************************************************************
 *
 *  Returns the "AL/AX/EAX, imm" accumulator encoding of the given instruction.
 */

inline
size_t              insCodeACC(instruction ins)
{
    const static
    size_t          insCodesACC[] =
    {
        #define INST0(id, nm, fp, um, rf, wf, mr                )
        #define INST1(id, nm, fp, um, rf, wf, mr                )
        #define INST2(id, nm, fp, um, rf, wf, mr, mi            )
        #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        )
        #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) a4,
        #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) a4,
        #include "instrs.h"
        #undef  INST0
        #undef  INST1
        #undef  INST2
        #undef  INST3
        #undef  INST4
        #undef  INST5
    };

    assert((unsigned)ins < sizeof(insCodesACC)/sizeof(insCodesACC[0]));
    assert((insCodesACC[ins] != BAD_CODE));

    return  insCodesACC[ins];
}

/*****************************************************************************
 *
 *  Returns the "register" encoding of the given CPU instruction.
 */

inline
size_t              insCodeRR(instruction ins)
{
    const static
    size_t          insCodesRR[] =
    {
        #define INST0(id, nm, fp, um, rf, wf, mr                )
        #define INST1(id, nm, fp, um, rf, wf, mr                )
        #define INST2(id, nm, fp, um, rf, wf, mr, mi            )
        #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        )
        #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    )
        #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rr,
        #include "instrs.h"
        #undef  INST0
        #undef  INST1
        #undef  INST2
        #undef  INST3
        #undef  INST4
        #undef  INST5
    };

    assert((unsigned)ins < sizeof(insCodesRR)/sizeof(insCodesRR[0]));
    assert((insCodesRR[ins] != BAD_CODE));

    return  insCodesRR[ins];
}

const static
size_t          insCodesMR[] =
{
    #define INST0(id, nm, fp, um, rf, wf, mr                )
    #define INST1(id, nm, fp, um, rf, wf, mr                ) mr,
    #define INST2(id, nm, fp, um, rf, wf, mr, mi            ) mr,
    #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) mr,
    #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) mr,
    #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mr,
    #include "instrs.h"
    #undef  INST0
    #undef  INST1
    #undef  INST2
    #undef  INST3
    #undef  INST4
    #undef  INST5
};

// Returns true iff the give CPU instruction has an MR encoding.
inline
size_t              hasCodeMR(instruction ins)
{
    assert((unsigned)ins < sizeof(insCodesMR)/sizeof(insCodesMR[0]));
    return((insCodesMR[ins] != BAD_CODE));
}


/*****************************************************************************
 *
 *  Returns the "[r/m], reg" or "[r/m]" encoding of the given CPU instruction.
 */

inline
size_t              insCodeMR(instruction ins)
{
    assert((unsigned)ins < sizeof(insCodesMR)/sizeof(insCodesMR[0]));
    assert((insCodesMR[ins] != BAD_CODE));

    return  insCodesMR[ins];
}

/*****************************************************************************
 *
 *  Returns an encoding for the specified register to be used in the bit0-2
 *  part of an opcode.
 */

inline
unsigned            emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, size_t* code)
{
    assert(reg < REG_STK);

#ifndef LEGACY_BACKEND
#ifdef _TARGET_AMD64_
    // Either code is not NULL or reg is not an extended reg.
    // If reg is an extended reg, instruction needs to be prefixed with 'REX'
    // which would require code != NULL.
    assert(code != nullptr || !IsExtendedReg(reg));

    if (IsExtendedReg(reg))
    {
        *code = AddRexBPrefix(ins, *code); // REX.B
    }
    else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
    {
        // We are assuming that we only use/encode SPL, BPL, SIL and DIL
        // not the corresponding AH, CH, DH, or BH
        *code = AddRexPrefix(ins, *code); // REX
    }
#endif // _TARGET_AMD64_

    reg = RegEncoding(reg);
    assert(reg < 8);
    return reg;

#else // LEGACY_BACKEND

    assert(reg < 8);
    return reg;

#endif // LEGACY_BACKEND
}

/*****************************************************************************
 *
 *  Returns an encoding for the specified register to be used in the bit3-5
 *  part of an opcode.
 */

inline
unsigned            emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, size_t* code)
{
    assert(reg < REG_STK);

#ifndef LEGACY_BACKEND
#ifdef _TARGET_AMD64_
    // Either code is not NULL or reg is not an extended reg.
    // If reg is an extended reg, instruction needs to be prefixed with 'REX'
    // which would require code != NULL.
    assert(code != nullptr || !IsExtendedReg(reg));

    if (IsExtendedReg(reg))
    {
        *code = AddRexRPrefix(ins, *code); // REX.R
    }
    else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
    {
        // We are assuming that we only use/encode SPL, BPL, SIL and DIL
        // not the corresponding AH, CH, DH, or BH
        *code = AddRexPrefix(ins, *code); // REX
    }
#endif // _TARGET_AMD64_

    reg = RegEncoding(reg);
#ifdef _TARGET_AMD64_
    assert(reg < REG_R8);
#endif // !_TARGET_AMD64_

    return(reg<< 3);

#else // LEGACY_BACKEND
    assert(reg < 8);
    return(reg<< 3);
#endif // LEGACY_BACKEND
}

/***********************************************************************************
 *
 *  Returns modified AVX opcode with the specified register encoded in bits 3-6 of
 *  byte 2 of VEX prefix.
 */
inline
size_t            emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, size_t code)
{   
#ifdef FEATURE_AVX_SUPPORT        
    assert(reg < REG_STK);
    assert(IsAVXInstruction(ins));
    assert(hasVexPrefix(code));

    // Get 4-bit register encoding 
    // RegEncoding() gives lower 3 bits
    // IsExtendedReg() gives MSB.
    size_t regBits =  RegEncoding(reg); 
    if (IsExtendedReg(reg))
    {
        regBits |= 0x08;
    }

    // VEX prefix encodes register operand in 1's complement form
    // Shift count = 4-bytes of opcode + 0-2 bits
    assert(regBits <= 0xF);
    regBits <<= 35;
    return code ^ regBits;

#else
    return code;
#endif
}


/*****************************************************************************
 *
 *  Returns an encoding for the specified register to be used in the bit3-5
 *  part of an SIB byte (unshifted).
 *  Used exclusively to generate the REX.X bit and truncate the register.
 */

inline
unsigned            emitter::insEncodeRegSIB(instruction ins, regNumber reg, size_t* code)
{
    assert(reg < REG_STK);

#ifdef _TARGET_AMD64_
    // Either code is not NULL or reg is not an extended reg.
    // If reg is an extended reg, instruction needs to be prefixed with 'REX'
    // which would require code != NULL.
    assert(code != NULL || reg < REG_R8 || (reg >= REG_XMM0 && reg < REG_XMM8));

    if (IsExtendedReg(reg))
    {
        *code = AddRexXPrefix(ins, *code); // REX.X
    }
    reg = RegEncoding(reg);
#endif

    assert(reg < 8);
    return reg;
}

/*****************************************************************************
 *
 *  Returns the "[r/m]" opcode with the mod/RM field set to register.
 */

inline
size_t              emitter::insEncodeMRreg(instruction ins, size_t code)
{
    // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
    // Otherwise, it will be placed after the 4 byte encoding.
    if ((code & 0xFF00) == 0)
    {
        assert((code & 0xC000) == 0);
        code |= 0xC000;
    }

    return code;
}


/*****************************************************************************
 *
 *  Returns the "[r/m], icon" opcode with the mod/RM field set to register.
 */

inline
size_t              insEncodeMIreg(instruction ins, size_t code)
{
    assert((code & 0xC000) == 0);
    code |= 0xC000;
    return code;
}

/*****************************************************************************
 *
 *  Returns the given "[r/m]" opcode with the mod/RM field set to register.
 */

inline
size_t              insEncodeRMreg(instruction ins, size_t code)
{
    // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
    // Otherwise, it will be placed after the 4 byte encoding.
    if ((code & 0xFF00) == 0)
    {
        assert((code & 0xC000) == 0);
        code |= 0xC000;
    }
    return code;
}

/*****************************************************************************
 *
 *  Returns the "byte ptr [r/m]" opcode with the mod/RM field set to
 *  the given register.
 */

inline
size_t              emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, size_t code)
{
    assert((code & 0xC000) == 0);
    code |= 0xC000;
    unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
    code |= regcode;
    return code;
}

/*****************************************************************************
 *
 *  Returns the "byte ptr [r/m], icon" opcode with the mod/RM field set to
 *  the given register.
 */

inline
size_t              emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, size_t code)
{
    assert((code & 0xC000) == 0);
    code |= 0xC000;
    unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
    code |= regcode;
    return code;
}

/*****************************************************************************
 *
 *  Returns true iff the given instruction does not have a "[r/m], icon" form, but *does* have a
 *  "reg,reg,imm8" form.
 */
inline
bool                insNeedsRRIb(instruction ins)
{
    // If this list gets longer, use a switch or a table.
    return ins == INS_imul;
}

/*****************************************************************************
 *
 *  Returns the "reg,reg,imm8" opcode with both the reg's set to the
 *  the given register.
 */
inline
size_t             emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size)
{
    assert(size == EA_4BYTE);  // All we handle for now.
    assert(insNeedsRRIb(ins));
    // If this list gets longer, use a switch, or a table lookup.
    size_t code = 0x69c0;
    unsigned regcode = insEncodeReg012(ins, reg, size, &code);
    // We use the same register as source and destination.  (Could have another version that does both regs...)
    code |= regcode;
    code |= (regcode << 3);
    return code;
}

/*****************************************************************************
 *
 *  Returns the "+reg" opcode with the the given register set into the low
 *  nibble of the opcode
 */

inline
size_t              emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size)
{
    size_t   code    = insCodeRR(ins);
    unsigned regcode = insEncodeReg012(ins, reg, size, &code);
    code |= regcode;
    return code;
}

/*****************************************************************************
 *
 *  Return the 'SS' field value for the given index scale factor.
 */

inline
unsigned            insSSval(unsigned scale)
{
    assert(scale == 1 ||
           scale == 2 ||
           scale == 4 ||
           scale == 8);

    const static
    BYTE    scales[] =
    {
        0x00,   // 1
        0x40,   // 2
        0xFF,   // 3
        0x80,   // 4
        0xFF,   // 5
        0xFF,   // 6
        0xFF,   // 7
        0xC0,   // 8
    };

    return  scales[scale-1];
}


const instruction       emitJumpKindInstructions[] =
{
    INS_nop,

    #define JMP_SMALL(en, rev, ins)           INS_##ins,
    #include "emitjmps.h"

    INS_call
};

const emitJumpKind      emitReverseJumpKinds[] =
{
    EJ_NONE,

    #define JMP_SMALL(en, rev, ins) EJ_##rev,
    #include "emitjmps.h"
};

/*****************************************************************************
 * Look up the instruction for a jump kind
 */

/*static*/ instruction      emitter::emitJumpKindToIns(emitJumpKind jumpKind)
{
    assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
    return emitJumpKindInstructions[jumpKind];
}

/*****************************************************************************
 * Reverse the conditional jump
 */

/* static */ emitJumpKind    emitter::emitReverseJumpKind(emitJumpKind jumpKind)
{
    assert(jumpKind < EJ_COUNT);
    return emitReverseJumpKinds[jumpKind];
}

/*****************************************************************************
 * The size for these instructions is less than EA_4BYTE,
 * but the target register need not be byte-addressable
 */

inline
bool                emitInstHasNoCode(instruction ins)
{
    if (ins == INS_align)
        return true;

    return false;
}


/*****************************************************************************
 * When encoding instructions that operate on byte registers
 * we have to ensure that we use a low register (EAX, EBX, ECX or EDX)
 * otherwise we will incorrectly encode the instruction
 */

bool                emitter::emitVerifyEncodable(instruction  ins,
                                                 emitAttr     size,
                                                 regNumber    reg1,
                                                 regNumber    reg2 /* = REG_NA */)
{
#if CPU_HAS_BYTE_REGS
    if (size != EA_1BYTE)    // Not operating on a byte register is fine
    {
        return true;
    }

    if ((ins != INS_movsx) &&     // These two instructions support high register 
        (ins != INS_movzx)    )   // encodings for reg1
    {
        // reg1 must be a byte-able register
        if ((genRegMask(reg1) & RBM_BYTE_REGS) == 0)
        {
            return false;
        }
    }
    // if reg2 is not REG_NA then reg2 must be a byte-able register
    if ((reg2 != REG_NA) &&
        ((genRegMask(reg2) & RBM_BYTE_REGS) == 0))
    {
        return false;
    }
#endif
    // The instruction can be encoded
    return true;
}

/*****************************************************************************
 *
 *  Estimate the size (in bytes of generated code) of the given instruction.
 */

inline
UNATIVE_OFFSET      emitter::emitInsSize(size_t code)
{
    UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2;
#ifdef _TARGET_AMD64_
    size += emitGetPrefixSize(code);
#endif
    return size;
}

inline
UNATIVE_OFFSET      emitter::emitInsSizeRM(instruction ins)
{
    return  emitInsSize(insCodeRM(ins));
}

inline
UNATIVE_OFFSET      emitter::emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr)
{
    emitAttr   size = EA_SIZE(attr);

    UNATIVE_OFFSET  sz;
#ifdef _TARGET_AMD64_
    // If Byte 4 (which is 0xFF00) is non-zero, that's where the RM encoding goes.
    // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes.
    // This would probably be better expressed as a different format or something?
    if (insCodeRM(ins) & 0xFF00)
    {
        sz = 5;
    }
    else
#endif // _TARGET_AMD64
    {
        size_t code = insCodeRM(ins);
        sz = emitInsSize(insEncodeRMreg(ins, code));
    }

    // Most 16-bit operand instructions will need a prefix
    if (size == EA_2BYTE && ins != INS_movsx
                         && ins != INS_movzx)
    {
        sz += 1;
    }

    // VEX prefix
    sz += emitGetVexPrefixAdjustedSize(ins, size, insCodeRM(ins));

    // REX prefix
    if ((TakesRexWPrefix(ins, size) && ((ins != INS_xor) || (reg1 != reg2)))
        || IsExtendedReg(reg1, attr)
        || IsExtendedReg(reg2, attr))
    {
        sz += emitGetRexPrefixSize(ins);
    }

    return sz;
}

/*****************************************************************************/

inline
UNATIVE_OFFSET      emitter::emitInsSizeSV(size_t code, int var, int dsp)
{
    UNATIVE_OFFSET  size = emitInsSize(code);
    UNATIVE_OFFSET  offs;
    bool            offsIsUpperBound = true;
    bool            EBPbased = true;

    /*  Is this a temporary? */

    if  (var < 0)
    {
        /* An address off of ESP takes an extra byte */

        if  (!emitHasFramePtr)
            size++;

#ifndef LEGACY_BACKEND
        // The offset is already assigned. Find the temp.
        TempDsc* tmp = emitComp->tmpFindNum(var, Compiler::TEMP_USAGE_USED);
        if (tmp == nullptr)
        {
            // It might be in the free lists, if we're working on zero initializing the temps.
            tmp = emitComp->tmpFindNum(var, Compiler::TEMP_USAGE_FREE);
        }
        assert(tmp != nullptr);
        offs = tmp->tdTempOffs();

        // We only care about the magnitude of the offset here, to determine instruction size.
        if (emitComp->isFramePointerUsed())
        {
            if ((int)offs < 0)
            {
                offs = -(int)offs;
            }
        }
        else
        {
            // SP-based offsets must already be positive.
            assert((int)offs >= 0);
        }
#else // LEGACY_BACKEND
        /* We'll have to estimate the max. possible offset of this temp */

        // TODO: Get an estimate of the temp offset instead of assuming
        // TODO: that any temp may be at the max. temp offset!!!!!!!!!!

        if (emitComp->lvaTempsHaveLargerOffsetThanVars())
        {
            offs = emitLclSize + emitMaxTmpSize;
        }
        else
        {
            offs = emitMaxTmpSize;
        }

        offsIsUpperBound = false;
#endif // LEGACY_BACKEND
    }
    else
    {

        /* Get the frame offset of the (non-temp) variable */

        offs = dsp + emitComp->lvaFrameAddress(var, &EBPbased);

        /* An address off of ESP takes an extra byte */

        if (!EBPbased)
        {
            ++size;
        }

        /* Is this a stack parameter reference? */

        if  ( emitComp->lvaIsParameter  (var)
#if !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)
              && !emitComp->lvaIsRegArgument(var)
#endif // !_TARGET_AMD64_ || UNIX_AMD64_ABI
            )
        {
            /* If no EBP frame, arguments are off of ESP, above temps */

            if  (!EBPbased)
            {
                assert((int)offs >= 0);

                offsIsUpperBound = false; // since #temps can increase
                offs += emitMaxTmpSize;
            }
        }
        else
        {
            /* Locals off of EBP are at negative offsets */

            if  (EBPbased)
            {
#ifdef _TARGET_AMD64_
                // If localloc is not used, then ebp chaining is done and hence
                // offset of locals will be at negative offsets, Otherwise offsets
                // will be positive.  In future, when RBP gets positioned in the
                // middle of the frame so as to optimize instruction encoding size,
                // the below asserts needs to be modified appropriately.
                if (emitComp->compLocallocUsed || emitComp->opts.compDbgEnC)
                {
                    noway_assert((int)offs >= 0);
                }
                else
#endif
                {
                    // Dev10 804810 - failing this assert can lead to bad codegen and runtime crashes
#ifdef UNIX_AMD64_ABI
                    LclVarDsc*  varDsc = emitComp->lvaTable + var;
                    bool isRegPassedArg = varDsc->lvIsParam && varDsc->lvIsRegArg;
                    // Register passed args could have a stack offset of 0.
                    noway_assert((int)offs < 0 || isRegPassedArg);
#else // !UNIX_AMD64_ABI
                    noway_assert((int)offs < 0);
#endif // !UNIX_AMD64_ABI
                }
 

                assert(emitComp->lvaTempsHaveLargerOffsetThanVars());

                // lvaInlinedPInvokeFrameVar and lvaStubArgumentVar are placed below the temps
                if (unsigned(var) == emitComp->lvaInlinedPInvokeFrameVar ||
                    unsigned(var) == emitComp->lvaStubArgumentVar)
                {
                    offs -= emitMaxTmpSize;
                }

                if ((int) offs < 0)
                {
                    // offset is negative
                    return  size + ((int(offs) >= SCHAR_MIN) ? sizeof(char) : sizeof( int));
                }
#ifdef _TARGET_AMD64_
                // This case arises for localloc frames
                else
                {
                    return size +  ((offs <= SCHAR_MAX) ? sizeof(char) : sizeof(int));
                }    
#endif
            }

            if (emitComp->lvaTempsHaveLargerOffsetThanVars() == false)
            {
                offs += emitMaxTmpSize;
            }
        }
    }

    assert((int)offs >= 0);

#if !FEATURE_FIXED_OUT_ARGS

    /* Are we addressing off of ESP? */

    if  (!emitHasFramePtr)
    {
        /* Adjust the effective offset if necessary */

        if  (emitCntStackDepth)
            offs += emitCurStackLvl;

        // we could (and used to) check for the special case [sp] here but the stack offset
        // estimator was off, and there is very little harm in overestimating for such a
        // rare case.
    }

#endif // !FEATURE_FIXED_OUT_ARGS

//  printf("lcl = %04X, tmp = %04X, stk = %04X, offs = %04X\n",
//         emitLclSize, emitMaxTmpSize, emitCurStackLvl, offs);

#ifdef _TARGET_AMD64_
    bool useSmallEncoding = (SCHAR_MIN <= (int)offs) && ((int)offs <= SCHAR_MAX);
#else
    bool useSmallEncoding = (offs <= size_t(SCHAR_MAX));
#endif

#ifdef LEGACY_BACKEND
    /* If we are using a small encoding, there is a danger that we might
       end up having to use a larger encoding. Record 'offs' so that
       we can detect if such a situation occurs */

    if (useSmallEncoding && !offsIsUpperBound)
    {
        if (emitGrowableMaxByteOffs < offs)
        {
            emitGrowableMaxByteOffs = offs;
#ifdef DEBUG
            // Remember which instruction this is
            emitMaxByteOffsIdNum    = emitInsCount;
#endif
        }
    }
#endif // LEGACY_BACKEND

    // If it is ESP based, and the offset is zero, we will not encode the disp part.
    if (!EBPbased && offs == 0)
    {
        return size;
    }
    else
    {
        return  size + (useSmallEncoding ? sizeof(char)
                                         : sizeof( int));
    }
}

inline
UNATIVE_OFFSET      emitter::emitInsSizeSV(instrDesc* id, int var, int dsp, int val)
{
    instruction    ins       = id->idIns();
    UNATIVE_OFFSET valSize   = EA_SIZE_IN_BYTES(id->idOpSize());
    UNATIVE_OFFSET prefix    = 0;
    bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);

#ifdef _TARGET_AMD64_
    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
    // all other opcodes take a sign-extended 4-byte immediate
    noway_assert(valSize <= sizeof(int) || !id->idIsCnsReloc());
#endif // _TARGET_AMD64_

    if  (valSize > sizeof(int))
    {
        valSize = sizeof(int);
    }

#ifdef RELOC_SUPPORT
    if (id->idIsCnsReloc())
    {
        valInByte = false;      // relocs can't be placed in a byte
        assert(valSize == sizeof(int));
    }
#endif

    if  (valInByte)
    {
        valSize = sizeof(char);
    }

    // 16-bit operand instructions need a prefix.
    // This referes to 66h size prefix override
    if (id->idOpSize() == EA_2BYTE)
    {        
        prefix = 1;
    }

    return prefix + valSize + emitInsSizeSV(insCodeMI(ins), var, dsp);
}

/*****************************************************************************/

UNATIVE_OFFSET      emitter::emitInsSizeAM(instrDesc* id, size_t code)
{
    emitAttr   attrSize  = id->idOpSize();
    instruction ins      = id->idIns();
    /* The displacement field is in an unusual place for calls */
    ssize_t     dsp       = (ins == INS_call) ? emitGetInsCIdisp(id)
                                              : emitGetInsAmdAny(id);
    bool        dspInByte = ((signed char)dsp == (ssize_t)dsp);
    bool        dspIsZero = (dsp == 0);
    UNATIVE_OFFSET size;

    // Note that the values in reg and rgx are used in this method to decide
    // how many bytes will be needed by the address [reg+rgx+cns] 
    // this includes the prefix bytes when reg or rgx are registers R8-R15
    regNumber   reg;
    regNumber   rgx;

    // The idAddr field is a union and only some of the instruction formats use the iiaAddrMode variant
    // these are IF_AWR_*, IF_ARD_*, IF_ARW_* and IF_*_ARD
    // ideally these should really be the only idInsFmts that we see here
    //  but we have some outliers to deal with:
    //     emitIns_R_L adds IF_RWR_LABEL and calls emitInsSizeAM
    //     emitInsRMW adds IF_MRW_CNS, IF_MRW_RRD and calls emitInsSizeAM

    switch (id->idInsFmt())
    {
    case IF_RWR_LABEL:
    case IF_MRW_CNS:
    case IF_MRW_RRD:
        reg = REG_NA;
        rgx = REG_NA;
        break;

    default:
        reg = id->idAddr()->iiaAddrMode.amBaseReg;
        rgx = id->idAddr()->iiaAddrMode.amIndxReg;
        break;
    }

#ifdef RELOC_SUPPORT
    if (id->idIsDspReloc())
    {
        dspInByte = false;      // relocs can't be placed in a byte
        dspIsZero = false;      // relocs won't always be zero
    }
#endif

    if  (code & 0xFF000000)
    {
        size = 4;
    }
    else if(code & 0x00FF0000)
    {
        assert(    (attrSize == EA_4BYTE)
                || (attrSize == EA_PTRSIZE) // Only for x64
                || (attrSize == EA_16BYTE)  // only for x64
                || (ins == INS_movzx)
                || (ins == INS_movsx));

        size = 3;
    }
    else
    {
        size = 2;

        // Most 16-bit operands will require a size prefix .
        // This refers to 66h size prefix override.

#if FEATURE_STACK_FP_X87
        if ((attrSize == EA_2BYTE) && (ins != INS_fldcw) && (ins != INS_fnstcw))
#else // FEATURE_STACK_FP_X87
        if (attrSize == EA_2BYTE)
#endif // FEATURE_STACK_FP_X87
        {
            size++;
        }
    }

#ifdef _TARGET_AMD64_
    size += emitGetVexPrefixAdjustedSize(ins, attrSize, code);

    if  (code & REX_PREFIX_MASK)
    {
        // REX prefix
        size += emitGetRexPrefixSize(ins);
    }
    else if (TakesRexWPrefix(ins, attrSize))
    {
        // REX.W prefix
        size += emitGetRexPrefixSize(ins);
    }
    else if (IsExtendedReg(reg, EA_PTRSIZE) || IsExtendedReg(rgx, EA_PTRSIZE) ||
             IsExtendedReg(id->idReg1(), attrSize))
    {
        // Should have a REX byte
        size += emitGetRexPrefixSize(ins);
    }
#endif // _TARGET_AMD64_

    if  (rgx == REG_NA)
    {
        /* The address is of the form "[reg+disp]" */

        switch (reg)
        {

        case REG_NA:

            /* The address is of the form "[disp]" */

            size += sizeof(INT32);

#ifdef _TARGET_AMD64_
            // If id is not marked for reloc, add 1 additional byte for SIB that follows disp32
            if (!id->idIsDspReloc())
            {
                size++;
            }
#endif
            return size;

        case REG_EBP: AMD64_ONLY(case REG_R13:)
            break;

        case REG_ESP: AMD64_ONLY(case REG_R12:)
            size++;

            __fallthrough;

        default:
            if  (dspIsZero)
                return size;
        }

        /* Does the offset fit in a byte? */

        if  (dspInByte)
            size += sizeof(char);
        else
            size += sizeof(INT32);
    }
    else
    {
        /* An index register is present */

        size++;

        /* Is the index value scaled? */

        if  (emitDecodeScale(id->idAddr()->iiaAddrMode.amScale) > 1)
        {
            /* Is there a base register? */

            if  (reg != REG_NA)
            {
                /* The address is "[reg + {2/4/8} * rgx + icon]" */

                if  (dspIsZero && reg != REG_EBP AMD64_ONLY( && reg != REG_R13))
                {
                    /* The address is "[reg + {2/4/8} * rgx]" */

                }
                else
                {
                    /* The address is "[reg + {2/4/8} * rgx + disp]" */

                    if  (dspInByte)
                        size += sizeof(char);
                    else
                        size += sizeof(int );
                }
            }
            else
            {
                /* The address is "[{2/4/8} * rgx + icon]" */

                size += sizeof(INT32);
            }
        }
        else
        {
            if  (dspIsZero && ((reg == REG_EBP) AMD64_ONLY( || (reg == REG_R13)))
                           && ((rgx != REG_EBP)  AMD64_ONLY( && (rgx != REG_R13))))
            {
                /* Swap reg and rgx, such that reg is not EBP/R13 */
                regNumber tmp = reg;
                id->idAddr()->iiaAddrMode.amBaseReg = reg = rgx;
                id->idAddr()->iiaAddrMode.amIndxReg = rgx = tmp;
            }

            /* The address is "[reg+rgx+dsp]" */

            if  (dspIsZero && reg != REG_EBP AMD64_ONLY( && reg != REG_R13))
            {
                /* This is [reg+rgx]" */

            }
            else
            {
                /* This is [reg+rgx+dsp]" */

                if  (dspInByte)
                    size += sizeof(char);
                else
                    size += sizeof(int );
            }
        }
    }

    return  size;
}

inline
UNATIVE_OFFSET      emitter::emitInsSizeAM(instrDesc* id, size_t code, int val)
{
    instruction    ins       = id->idIns();
    UNATIVE_OFFSET valSize   = EA_SIZE_IN_BYTES(id->idOpSize());
    bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);

#ifdef _TARGET_AMD64_
    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
    // all other opcodes take a sign-extended 4-byte immediate
    noway_assert(valSize <= sizeof(INT32) || !id->idIsCnsReloc());
#endif // _TARGET_AMD64_

    if  (valSize > sizeof(INT32))
    {
        valSize = sizeof(INT32);
    }

#ifdef RELOC_SUPPORT
    if (id->idIsCnsReloc())
    {
        valInByte = false;      // relocs can't be placed in a byte
        assert(valSize == sizeof(INT32));
    }
#endif

    if  (valInByte)
    {
        valSize = sizeof(char);
    }

    return  valSize + emitInsSizeAM(id, code);
}

inline
UNATIVE_OFFSET      emitter::emitInsSizeCV(instrDesc* id, size_t code)
{
    instruction    ins       = id->idIns();

    // fgMorph changes any statics that won't fit into 32-bit addresses
    // into constants with an indir, rather than GT_CLS_VAR
    // so we should only hit this path for statics that are RIP-relative
    UNATIVE_OFFSET size      = sizeof(INT32);

    // Most 16-bit operand instructions will need a prefix.
    // This refers to 66h size prefix override.

    if  (id->idOpSize() == EA_2BYTE &&
         ins != INS_movzx &&
         ins != INS_movsx)
    {
        size++;
    }

    return  size + emitInsSize(code);
}

inline
UNATIVE_OFFSET      emitter::emitInsSizeCV(instrDesc* id, size_t code, int val)
{
    instruction    ins       = id->idIns();
    UNATIVE_OFFSET valSize   = EA_SIZE_IN_BYTES(id->idOpSize());
    bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);

#ifndef _TARGET_AMD64_
    // occasionally longs get here on x86
    if  (valSize > sizeof(INT32))
        valSize = sizeof(INT32);
#endif // !_TARGET_AMD64_

#ifdef RELOC_SUPPORT
    if (id->idIsCnsReloc())
    {
        valInByte = false;      // relocs can't be placed in a byte
        assert(valSize == sizeof(INT32));
    }
#endif

    if  (valInByte)
    {
        valSize = sizeof(char);
    }

    return valSize + emitInsSizeCV(id, code);
}

/*****************************************************************************
 *
 *  Allocate instruction descriptors for instructions with address modes.
 */

inline
emitter::instrDesc*     emitter::emitNewInstrAmd   (emitAttr size, ssize_t dsp)
{
    if  (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
    {
        instrDescAmd*   id = emitAllocInstrAmd   (size);

        id->idSetIsLargeDsp();
#ifdef  DEBUG
        id->idAddr()->iiaAddrMode.amDisp  = AM_DISP_BIG_VAL;
#endif
        id->idaAmdVal                  = dsp;

        return  id;
    }
    else
    {
        instrDesc*      id = emitAllocInstr      (size);

        id->idAddr()->iiaAddrMode.amDisp  = dsp;
        assert(id->idAddr()->iiaAddrMode.amDisp == dsp);  // make sure the value fit

        return  id;
    }
}

/*****************************************************************************
 *
 *  Set the displacement field in an instruction. Only handles instrDescAmd type.
 */

inline
void                    emitter::emitSetAmdDisp(instrDescAmd* id, ssize_t dsp)
{
    if  (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
    {
        id->idSetIsLargeDsp();
#ifdef  DEBUG
        id->idAddr()->iiaAddrMode.amDisp  = AM_DISP_BIG_VAL;
#endif
        id->idaAmdVal                  = dsp;
    }
    else
    {
        id->idSetIsSmallDsp();
        id->idAddr()->iiaAddrMode.amDisp  = dsp;
        assert(id->idAddr()->iiaAddrMode.amDisp == dsp);  // make sure the value fit
    }
}


/*****************************************************************************
 *
 *  Allocate an instruction descriptor for an instruction that uses both
 *  an address mode displacement and a constant.
 */

emitter::instrDesc*   emitter::emitNewInstrAmdCns(emitAttr size, ssize_t dsp, int cns)
{
    if  (dsp >= AM_DISP_MIN && dsp <= AM_DISP_MAX)
    {
        if  (cns >= ID_MIN_SMALL_CNS &&
             cns <= ID_MAX_SMALL_CNS)
        {
            instrDesc*      id = emitAllocInstr      (size);

            id->idSmallCns(cns);

            id->idAddr()->iiaAddrMode.amDisp  = dsp;
            assert(id->idAddr()->iiaAddrMode.amDisp == dsp);  // make sure the value fit

            return  id;
        }
        else
        {
            instrDescCns*   id = emitAllocInstrCns   (size);

            id->idSetIsLargeCns();
            id->idcCnsVal                  = cns;

            id->idAddr()->iiaAddrMode.amDisp  = dsp;
            assert(id->idAddr()->iiaAddrMode.amDisp == dsp);  // make sure the value fit

            return  id;
        }
    }
    else
    {
        if  (cns >= ID_MIN_SMALL_CNS &&
             cns <= ID_MAX_SMALL_CNS)
        {
            instrDescAmd*   id = emitAllocInstrAmd   (size);

            id->idSetIsLargeDsp();
#ifdef  DEBUG
            id->idAddr()->iiaAddrMode.amDisp  = AM_DISP_BIG_VAL;
#endif
            id->idaAmdVal                  = dsp;

            id->idSmallCns(cns);

            return  id;
        }
        else
        {
            instrDescCnsAmd* id = emitAllocInstrCnsAmd(size);

            id->idSetIsLargeCns();
            id->idacCnsVal                 = cns;

            id->idSetIsLargeDsp();
#ifdef  DEBUG
            id->idAddr()->iiaAddrMode.amDisp  = AM_DISP_BIG_VAL;
#endif
            id->idacAmdVal                 = dsp;

            return  id;
        }
    }
}


/*****************************************************************************
 *
 *  The next instruction will be a loop head entry point
 *  So insert a dummy instruction here to ensure that
 *  the x86 I-cache alignment rule is followed.
 */

void                emitter::emitLoopAlign()
{
    /* Insert a pseudo-instruction to ensure that we align
       the next instruction properly */

    instrDesc* id  = emitNewInstrTiny(EA_1BYTE);
    id->idIns(INS_align);
    id->idCodeSize(15);        // We may need to skip up to 15 bytes of code
    emitCurIGsize += 15;
}


/*****************************************************************************
 *
 *  Add a NOP instruction of the given size.
 */

void                emitter::emitIns_Nop(unsigned size)
{
    assert(size <= 15);

    instrDesc* id = emitNewInstr();
    id->idIns(INS_nop);
    id->idInsFmt(IF_NONE);
    id->idCodeSize(size);

    dispIns(id);
    emitCurIGsize += size;
}

/*****************************************************************************
 *
 *  Add an instruction with no operands.
 */
#ifdef DEBUG
static bool         isX87InsWithNoOperands(instruction ins)
{
#if FEATURE_STACK_FP_X87
    return (
           ins == INS_f2xm1   ||
           ins == INS_fchs    ||
           ins == INS_fld1    ||
           ins == INS_fld1    ||
           ins == INS_fldl2e  ||
           ins == INS_fldz    ||
           ins == INS_fprem   ||
           ins == INS_frndint ||
           ins == INS_fscale  );
#else // !FEATURE_STACK_FP_X87
    return false;
#endif // !FEATURE_STACK_FP_X87
}
#endif // DEBUG

void                emitter::emitIns(instruction ins)
{
    UNATIVE_OFFSET sz;
    instrDesc* id = emitNewInstr();
    size_t     code = insCodeMR(ins);

#ifdef  DEBUG
#if FEATURE_STACK_FP_X87
    if    (ins != INS_fabs    &&
           ins != INS_fsqrt   &&
           ins != INS_fsin    &&
           ins != INS_fcos)
#endif // FEATURE_STACK_FP_X87

    {
        // We cannot have #ifdef inside macro expansion.
        bool assertCond = 
            (ins == INS_cdq     ||
            isX87InsWithNoOperands(ins) ||
            ins == INS_int3    ||
            ins == INS_lock    ||
            ins == INS_leave   ||
            ins == INS_movsb   ||
            ins == INS_movsd   ||
            ins == INS_movsp   ||
            ins == INS_nop     ||
            ins == INS_r_movsb ||
            ins == INS_r_movsd ||
            ins == INS_r_movsp ||
            ins == INS_r_stosb ||
            ins == INS_r_stosd ||
            ins == INS_r_stosp ||
            ins == INS_ret     ||
            ins == INS_sahf    ||
            ins == INS_stosb   ||
            ins == INS_stosd   ||
            ins == INS_stosp 
#ifdef _TARGET_AMD64_
            || ins == INS_vzeroupper 
#endif
            );

        assert(assertCond);    
    }
#endif //DEBUG

#ifdef _TARGET_AMD64_
    assert((code & REX_PREFIX_MASK) == 0); // Can't have a REX bit with no operands, right?
#endif // _TARGET_AMD64_

    if (code & 0xFF000000)
        sz = 2; // TODO-XArch-Bug?: Shouldn't this be 4? Or maybe we should assert that we don't see this case.
    else if (code & 0x00FF0000)
        sz = 3;
    else if (code & 0x0000FF00)
        sz = 2;
    else
        sz = 1;

#ifdef _TARGET_AMD64_
    // Account for 2-byte VEX prefix in case of vzeroupper
    if (ins == INS_vzeroupper)
    {
        sz += 2;
    }
#endif

    insFormat fmt = IF_NONE;

#if FEATURE_STACK_FP_X87
    if (CodeGen::instIsFP(ins))
    {
        fmt = emitInsModeFormat(ins, IF_TRD);
    }
#endif // FEATURE_STACK_FP_X87

    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

#if !defined(LEGACY_BACKEND)
// Add an instruction with no operands, but whose encoding depends on the size
// (Only CDQ/CQO currently)
void emitter::emitIns(instruction ins, emitAttr attr)
{
    UNATIVE_OFFSET sz;
    instrDesc*  id = emitNewInstr(attr);
    size_t      code = insCodeMR(ins);
    assert(ins == INS_cdq);
    assert((code & 0xFFFFFF00) == 0);
    sz = 1;

    insFormat fmt = IF_NONE;

    sz += emitGetVexPrefixAdjustedSize(ins, attr, code);
    if (TakesRexWPrefix(ins, attr))
        sz += emitGetRexPrefixSize(ins);

    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}


// fill in all the fields
void emitter::emitHandleMemOp(GenTree* mem, instrDesc* id, bool isSrc)
{
    GenTreeIndir* indir = mem->AsIndir();

    GenTree* memBase = indir->Base();

    if ((memBase != nullptr) && memBase->isContained() && (memBase->OperGet() == GT_CLS_VAR_ADDR))
    {
        CORINFO_FIELD_HANDLE fldHnd = memBase->gtClsVar.gtClsVarHnd;

        // Static always need relocs
        if (!jitStaticFldIsGlobAddr(fldHnd))
        {
            // Contract: 
            // fgMorphField() changes any statics that won't fit into 32-bit addresses into
            // constants with an indir, rather than GT_CLS_VAR, based on reloc type hint given
            // by VM. Hence emitter should always mark GT_CLS_VAR_ADDR as relocatable.
            //
            // Data section constants: these get allocated close to code block of the method and 
            // always addressable IP relative.  These too should be marked as relocatable.

            id->idSetIsDspReloc();
        }

        id->idAddr()->iiaFieldHnd = fldHnd;
        if (isSrc)
        {
            id->idInsFmt(IF_RRD_MRD);
        }
        else
        {
            id->idInsFmt(IF_MRD_RRD);
        }
    }
    else if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained())
    {
        // Absolute addresses marked as contained should fit within the base of addr mode.
        assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp));
        
        // Either not generating relocatable code or addr must be an icon handle
        assert(!emitComp->opts.compReloc || memBase->IsIconHandle());

        if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp))
        {
            id->idSetIsDspReloc();
        }

        if (isSrc)
        {
            id->idInsFmt(IF_RRD_ARD);
        }
        else
        {
            id->idInsFmt(IF_ARD_RRD);
        }

        id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
        id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;

        assert(emitGetInsAmdAny(id) == memBase->AsIntConCommon()->IconValue());
    }
    else
    {
        if (memBase != nullptr)
        {
            id->idAddr()->iiaAddrMode.amBaseReg = memBase->gtRegNum;
        }
        else
        {
            id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
        }

        if (indir->HasIndex())
        {
            id->idAddr()->iiaAddrMode.amIndxReg = indir->Index()->gtRegNum;
        }
        else
        {
            id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
        }
        id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(indir->Scale());

        // already set disp in ctor
        assert(emitGetInsAmdAny(id) == ssize_t(indir->Offset())); // make sure "disp" is stored properly
    }
}

// Takes care of storing all incoming register parameters
// into its corresponding shadow space (defined by the x64 ABI)
void emitter::spillIntArgRegsToShadowSlots()
{
    unsigned argNum;
    instrDesc* id;
    UNATIVE_OFFSET sz;

    assert(emitComp->compGeneratingProlog);

    for (argNum = 0; argNum < MAX_REG_ARG; ++argNum)
    {
        regNumber argReg = intArgRegs[argNum];

        // The offsets for the shadow space start at RSP + 8
        // (right before the caller return address)
        int offset = (argNum + 1) * EA_PTRSIZE;

        id = emitNewInstrAmd(EA_PTRSIZE, offset);
        id->idIns(INS_mov);
        id->idInsFmt(IF_AWR_RRD);
        id->idAddr()->iiaAddrMode.amBaseReg = REG_SPBASE;
        id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
        id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(1);

        // The offset has already been set in the intrDsc ctor, 
        // make sure we got it right.
        assert(emitGetInsAmdAny(id) == ssize_t(offset)); 

        id->idReg1(argReg);
        sz = emitInsSizeAM(id, insCodeMR(INS_mov));
        id->idCodeSize(sz);
        emitCurIGsize += sz;
    }
}

// this is very similar to emitInsBinary and probably could be folded in to same
// except the requirements on the incoming parameter are different,
// ex: the memory op in storeind case must NOT be contained
void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node)
{
    UNATIVE_OFFSET sz;
    instrDesc* id;

    switch (node->OperGet())
    {
    case GT_IND:
        {
            GenTreeIndir* mem = node->AsIndir();
            
            if (mem->HasBase() && mem->Base()->OperGet() == GT_CLS_VAR_ADDR)
            {
                emitIns_R_C(ins, attr, node->gtRegNum, mem->Base()->gtClsVar.gtClsVarHnd, 0);
                return;
            }
            else if (mem->Addr()->OperGet() == GT_LCL_VAR_ADDR)
            {
                GenTreeLclVarCommon* varNode = mem->Addr()->AsLclVarCommon();
                emitIns_R_S(ins, attr, node->gtRegNum, varNode->GetLclNum(), 0);
                codeGen->genUpdateLife(varNode);
                return;
            }
            else
            {
                GenTreePtr addr = mem->Addr();

                assert (addr->OperIsAddrMode() ||
                        addr->gtOper == GT_CLS_VAR_ADDR ||
                        (addr->IsCnsIntOrI() && addr->isContained()) ||
                        !addr->isContained());
                size_t offset = mem->Offset();
                id = emitNewInstrAmd(attr, offset);
                id->idIns(ins);
                id->idReg1(node->gtRegNum);
                id->idInsFmt(IF_RWR_ARD);
                emitHandleMemOp(node, id, true); // may overwrite format

                if (addr->IsCnsIntOrI() && addr->isContained())
                {
                    // Absolute addresses marked as contained should fit within the base of addr mode.
                    assert(addr->AsIntConCommon()->FitsInAddrBase(emitComp));

                    // Case of "ins re, [disp]" and should use IF_RWR_ARD as format
                    id->idInsFmt(IF_RWR_ARD);
                }

                sz = emitInsSizeAM(id, insCodeRM(ins));
                id->idCodeSize(sz);
            }
        }
        break;

    case GT_STOREIND:
        {
            GenTreeIndir* mem = node->AsIndir();
            GenTree* memBase = mem->Base();
            size_t offset = mem->Offset();
            GenTree* data = node->gtOp.gtOp2;

            if ((memBase != nullptr) && (memBase->OperGet() == GT_CLS_VAR_ADDR))
            {
                if (data->isContained())
                {
                    emitIns_C_I(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0, (int) data->AsIntConCommon()->IconValue());
                }
                else
                {
                    emitIns_C_R(ins, attr, memBase->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
                }
                return;
            }

            if (mem->Addr()->OperGet() == GT_LCL_VAR_ADDR)
            {
                GenTreeLclVarCommon* varNode = memBase->AsLclVarCommon();
                if (data->isContainedIntOrIImmed())
                {
                    emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int) data->AsIntConCommon()->IconValue());
                    codeGen->genUpdateLife(varNode);
                }
                else
                {
                    assert(!data->isContained());
                    emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
                    codeGen->genUpdateLife(varNode);
                }
                return;
            }

            if (data->isContainedIntOrIImmed())
            {
                int icon = (int) data->AsIntConCommon()->IconValue();
                id = emitNewInstrAmdCns(attr, offset, icon);
                id->idIns(ins);
                id->idInsFmt(IF_AWR_CNS);
                emitHandleMemOp(node, id, false); // may overwrite format

                if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained())
                {                    
                    // Absolute addresses marked as contained should fit within the base of addr mode.
                    assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp));

                    // Case of "ins [disp], immed " and should use IF_AWR_CNS as format
                    id->idInsFmt(IF_AWR_CNS);
                }

                sz = emitInsSizeAM(id, insCodeMI(ins), icon);
                id->idCodeSize(sz);
            }
            else
            {
                id = emitNewInstrAmd(attr, offset);
                id->idIns(ins);
                id->idInsFmt(IF_AWR_RRD);
                emitHandleMemOp(node, id, false); // may overwrite format
                id->idReg1(node->gtOp.gtOp2->gtRegNum);
                sz = emitInsSizeAM(id, insCodeMR(ins));
                id->idCodeSize(sz);
            }
        }
        break;

    case GT_STORE_LCL_VAR:
        {
            GenTreeLclVarCommon* varNode = node->AsLclVarCommon();

            GenTree* data = node->gtOp.gtOp1->gtEffectiveVal();
            codeGen->inst_set_SV_var(varNode);
            assert(varNode->gtRegNum == REG_NA); // stack store

            if (data->isContainedIntOrIImmed())
            {
                emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int) data->AsIntConCommon()->IconValue());
                codeGen->genUpdateLife(varNode);
            }
            else
            {
                assert(!data->isContained());
                emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
                codeGen->genUpdateLife(varNode);
            }
        }
        return;

    default:
        unreached();
    }

    dispIns(id);
    emitCurIGsize += sz;
}

CORINFO_FIELD_HANDLE emitter::emitLiteralConst(ssize_t cnsValIn, emitAttr attr /*= EA_8BYTE*/)
{
    NYI("emitLiteralConst");
    return nullptr;
}

// Generates a float or double data section constant and returns field handle representing
// the data offset to access the constant.  This is called by emitInsBinary() in case
// of contained float of double constants.
CORINFO_FIELD_HANDLE emitter::emitFltOrDblConst(GenTreeDblCon* tree, emitAttr attr /*=EA_UNKNOWN*/)
{
    if (attr == EA_UNKNOWN)
    {
        attr = emitTypeSize(tree->TypeGet());
    }
    else
    {
        assert(emitTypeSize(tree->TypeGet()) == attr);    
    }

    double constValue = tree->gtDblCon.gtDconVal;
    void *cnsAddr;
    float f;
    bool dblAlign;

    if (attr == EA_4BYTE)
    {        
        f  = forceCastToFloat(constValue);
        cnsAddr = &f;
        dblAlign = false;
    }
    else
    {
        cnsAddr = &constValue;
        dblAlign = true;
    }

    // Access to inline data is 'abstracted' by a special type of static member
    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
    // to constant data, not a real static field.

    UNATIVE_OFFSET cnsSize = (attr == EA_4BYTE) ? 4 : 8;
    UNATIVE_OFFSET cnum = emitDataConst(cnsAddr, cnsSize, dblAlign);
    return emitComp->eeFindJitDataOffs(cnum);
}

// The callee must call genConsumeReg() for all sources, including address registers
// of both source and destination, and genProduceReg() for the destination register, if any.

regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
{
    // dst can only be a reg or modrm
    assert(!dst->isContained() ||
           dst->isContainedIndir() ||
           dst->isContainedLclField() ||
           instrIs3opImul(ins)); // dst on these isn't really the dst

    // src can be anything but both src and dst cannot be addr modes
    // or at least cannot be contained addr modes
    if (dst->isContainedIndir())
        assert(!src->isContainedIndir());

    if (src->isContainedLclField())
        assert(!dst->isContained());

    // find which operand is a memory op (if any)
    // and what its base is
    GenTreeIndir* mem = nullptr;
    GenTree* memBase = nullptr;

    if (dst->isContainedIndir())
    {
        mem = dst->AsIndir();
    }
    else if (src->isContainedIndir())
    {
        mem = src->AsIndir();
    }

    if (mem)
    {
        memBase = mem->gtOp1;
    }

    // Find immed (if any) - it cannot be the dst
    // SSE2 instructions allow only the second operand to be a memory operand.
    GenTreeIntConCommon* intConst = nullptr;
    GenTreeDblCon *dblConst = nullptr;
    if (src->isContainedIntOrIImmed())
    {
        intConst = src->AsIntConCommon();
    }     
    else if(src->isContainedFltOrDblImmed())
    {
        dblConst = src->AsDblCon();
    }
    
    // find local field if any
    GenTreeLclFld* lclField = nullptr;
    if (src->isContainedLclField())
    {
        lclField = src->AsLclFld();
    }
    else if (dst->isLclField() && dst->gtRegNum == REG_NA)
    {
        lclField = dst->AsLclFld();
    }

    // First handle the simple non-memory cases
    //
    if ((mem == nullptr) && (lclField == nullptr))
    {
        if (intConst != nullptr)
        {
            // reg, immed
            assert(!dst->isContained());

            emitIns_R_I(ins, attr, dst->gtRegNum, intConst->IconValue());
            // TODO-XArch-Bug?: does the caller call regTracker.rsTrackRegTrash(dst->gtRegNum) or rsTrackRegIntCns(dst->gtRegNum, intConst->IconValue()) (as appropriate)?
        }
        else if (dblConst != nullptr)
        {
            // Emit a data section constant for float or double constant.
            CORINFO_FIELD_HANDLE hnd = emitFltOrDblConst(dblConst);

            emitIns_R_C(ins, attr, dst->gtRegNum, hnd, 0);            
        }
        else
        {
            // reg, reg
            assert(!src->isContained() && !dst->isContained());

            if (instrHasImplicitRegPairDest(ins))
            {
                emitIns_R(ins, attr, src->gtRegNum);
            }
            else
            {
                emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum);
            }
            // ToDo-XArch-Bug?: does the caller call regTracker.rsTrackRegTrash(dst->gtRegNum) or, for ins=MOV: regTracker.rsTrackRegCopy(dst->gtRegNum, src->gtRegNum); ?
        }

        return dst->gtRegNum;
    }

    // Next handle the cases where we have a stack based local memory operand
    //
    if (lclField)
    {
        unsigned offset = lclField->gtLclFld.gtLclOffs;
        unsigned varNum = lclField->gtLclVarCommon.gtLclNum;

        // Is the memory op in the source position?
        if (src->isContainedLclField())
        {
            if (instrHasImplicitRegPairDest(ins))
            {
                // src is a stack based local variable
                // dst is implicit - RDX:RAX
                emitIns_S(ins, attr, varNum, offset);
            }
            else
            {
                // src is a stack based local variable
                // dst is a register
                emitIns_R_S(ins, attr, dst->gtRegNum, varNum, offset);
            }
        }
        else  // The memory op is in the dest position.
        {
            assert(dst->gtRegNum == REG_NA);
            // src could be int or reg
            if (src->isContainedIntOrIImmed())
            {
                // src is an contained immediate 
                // dst is a stack based local variable
                emitIns_S_I(ins, attr, varNum, offset, (int) src->gtIntConCommon.IconValue());
            }
            else
            {
                // src is a register
                // dst is a stack based local variable
                assert(!src->isContained());
                emitIns_S_R(ins, attr, src->gtRegNum, varNum, offset);
            }
        }

        return dst->gtRegNum;
    }

    // Now we are left with only the cases where the instruction has some kind of a memory operand 
    //
    assert(mem != nullptr);

    // Next handle the class static variable cases
    //
    if (memBase->OperGet() == GT_CLS_VAR_ADDR)
    {
        // Is the memory op in the source position?
        if (mem == src)
        {
            if (instrHasImplicitRegPairDest(ins))
            {
                // src is a class static variable
                // dst is implicit - RDX:RAX
                emitIns_C(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0);
            }
            else
            {
                // src is a class static variable
                // dst is a register
                emitIns_R_C(ins, attr, dst->gtRegNum, memBase->gtClsVar.gtClsVarHnd, 0);
                codeGen->genProduceReg(dst);
            }
        }
        else  // The memory op is in the dest position.
        {
            if (src->isContained())
            {
                // src is an contained immediate 
                // dst is a class static variable
                emitIns_C_I(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0, (int) src->gtIntConCommon.IconValue());
            }
            else
            {
                // src is a register
                // dst is a class static variable
                emitIns_C_R(ins, attr, memBase->gtClsVar.gtClsVarHnd, src->gtRegNum, 0);
            }
        }

        return dst->gtRegNum;
    }

    // Finally we handle addressing modes case [regBase + regIndex*scale + const]
    //
    // We will have to construct and fill in the instruction descriptor for this case
    //    
    instrDesc* id = nullptr;

    // Is the src an immediate constant?
    if (intConst)
    {
        // [mem], imm
        id = emitNewInstrAmdCns(attr, mem->Offset(), (int) intConst->IconValue());
    }
    else // [mem], reg OR reg, [mem]
    {            
        size_t offset = mem->Offset();
        id = emitNewInstrAmd(attr, offset);
        id->idIns(ins);

        GenTree* regTree = (src == mem) ? dst : src;

        // there must be one non-contained src
        assert(!regTree->isContained());
        id->idReg1(regTree->gtRegNum);
    }
    assert(id != nullptr);

    id->idIns(ins);    // Set the instruction.

    emitHandleMemOp(mem, id, mem == src);

    // Determine the instruction format
    //
    insFormat fmt = IF_NONE;
    if (mem == dst)
    {
        if (!src->isContained())
        {
            fmt = emitInsModeFormat(ins, IF_ARD_RRD);
        }
        else
        {
            fmt = emitInsModeFormat(ins, IF_ARD_CNS);
        }
    }
    else
    {
        assert(!dst->isContained());
        if (instrHasImplicitRegPairDest(ins))
        {
            fmt = emitInsModeFormat(ins, IF_ARD);
        }
        else
        {
            fmt = emitInsModeFormat(ins, IF_RRD_ARD);
        }
    }
    assert(fmt != IF_NONE);

    id->idInsFmt(fmt);

    // Determine the instruction size
    //
    UNATIVE_OFFSET  sz = 0;
    if (intConst)
    {
        sz = emitInsSizeAM(id, insCodeMI(ins), (int) intConst->IconValue());
    }
    else
    {
        if (mem == dst)
        {
            sz = emitInsSizeAM(id, insCodeMR(ins));
        }
        else // mem == src
        {
            if (instrHasImplicitRegPairDest(ins))
            {
                sz = emitInsSizeAM(id, insCode(ins));
            }
            else
            {
                sz = emitInsSizeAM(id, insCodeRM(ins));
            }
        }            
    }
    assert(sz != 0);

    regNumber result = REG_NA;
    if (src == mem)
    {
        result = dst->gtRegNum;
    }

    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;

    return result;
}

/** Emit logic for Read-Modify-Write Instructions
 *  Responsible for encoding a single instruction that will perform an operation of the form
 *  *addr op= operand for example
 *  ADD [RAX], RCX
 *
 *  Preconditions:  Lowering has taken care of recognizing the StoreInd pattern of
 *                  StoreInd( AddressTree, BinOp( Ind ( AddressTree ), Operand )
 *                  The address to store is already sitting in a register.
 *
 *                  This is a no-produce operation, meaning that no register output will
 *                  be produced for future use in the code stream.
 *
 * The caller is responsible for calling genConsumeReg() on all source registers, and
 * genProduceReg on the target register, if any.
 */
void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTree* dstAddr, GenTree* src)
{
    assert(dstAddr->isIndir());
    GenTreeIndir* indir = dstAddr->AsIndir();
    GenTreePtr mem = indir->Addr();
    assert(mem->gtSkipReloadOrCopy()->OperGet() == GT_LCL_VAR ||
           mem->gtSkipReloadOrCopy()->OperGet() == GT_LCL_VAR_ADDR ||
           mem->gtSkipReloadOrCopy()->OperGet() == GT_LEA ||
           mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR ||
           mem->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT);

    instrDesc* id = nullptr;
    UNATIVE_OFFSET  sz;

    size_t offset = 0;
    if (mem->gtSkipReloadOrCopy()->OperGet() != GT_CLS_VAR_ADDR)
    {
        offset = indir->Offset();
    }

    // find immed (if any) - it cannot be a dst
    GenTreeIntConCommon* intConst = nullptr;
    if (src->isContainedIntOrIImmed())
    {
        intConst = src->AsIntConCommon();
    }

    if (intConst != nullptr)
    {
        id = emitNewInstrAmdCns(attr, offset, (int) intConst->IconValue());
    }
    else
    {
        // ind, reg
        id = emitNewInstrAmd(attr, offset);
        // there must be one non-contained src
        assert(!src->isContained());
        id->idReg1(src->gtRegNum);
        // fmt is set below
    }

    id->idIns(ins);

    emitHandleMemOp(dstAddr, id, false);

    if (src->isContainedIntOrIImmed())
    {
        if (mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR)
        {
            id->idInsFmt(IF_MRW_CNS);
        }
        else
        {
            id->idInsFmt(IF_ARW_CNS);
        }
        sz = emitInsSizeAM(id, insCodeMI(ins), (int) intConst->IconValue());
    }
    else
    {
        if (mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR)
        {
            id->idInsFmt(IF_MRW_RRD);
        }
        else
        {
            id->idInsFmt(IF_ARW_RRD);
        }
        sz = emitInsSizeAM(id, insCodeMR(ins));
    }

    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}


/** Emit logic for Read-Modify-Write Instructions on unary operators
 *  Responsible for encoding a single instruction that will perform an operation of the form
 *  *addr = UnaryOp *addr operand for example
 *  NOT [RAX]
 *
 *  Preconditions:  Lowering has taken care of recognizing the StoreInd pattern of
 *                  StoreInd( AddressTree, UnOp( Ind ( AddressTree ) )
 *                  The address to store is already sitting in a register.
 *
 *                  This is a no-produce operation, meaning that no register output will
 *                  be produced for future use in the code stream.
 */
void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTree* dstAddr)
{
    assert(ins == INS_not || ins == INS_neg);
    assert(dstAddr->isIndir());
    GenTreeIndir* indir = dstAddr->AsIndir();
    GenTreePtr mem = indir->Addr();
    assert(mem->gtSkipReloadOrCopy()->OperGet() == GT_LCL_VAR ||
           mem->gtSkipReloadOrCopy()->OperGet() == GT_LCL_VAR_ADDR ||
           mem->gtSkipReloadOrCopy()->OperGet() == GT_CLS_VAR_ADDR ||
           mem->gtSkipReloadOrCopy()->OperGet() == GT_LEA ||
           mem->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT);

    size_t offset = 0;
    if (mem->gtSkipReloadOrCopy()->OperGet() != GT_CLS_VAR_ADDR)
    {
        offset = indir->Offset();
    }

    instrDesc* id = emitNewInstrAmd(attr, offset);

    emitHandleMemOp(dstAddr, id, true);

    id->idIns(ins);

    if(mem->OperGet() == GT_CLS_VAR_ADDR)
    {
        id->idInsFmt(IF_MRW);
    }
    else
    {
        id->idInsFmt(IF_ARW);
    }

    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

#endif // !LEGACY_BACKEND

#if FEATURE_STACK_FP_X87
/*****************************************************************************
 *
 *  Add an instruction of the form "op ST(0),ST(n)".
 */

void                emitter::emitIns_F0_F(instruction ins, unsigned fpreg)
{
    UNATIVE_OFFSET sz = 2;
    instrDesc*  id = emitNewInstr();
    insFormat  fmt = emitInsModeFormat(ins, IF_TRD_FRD);

    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idReg1((regNumber)fpreg);
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

/*****************************************************************************
 *
 *  Add an instruction of the form "op ST(n),ST(0)".
 */

void                emitter::emitIns_F_F0(instruction ins, unsigned fpreg)
{
    UNATIVE_OFFSET sz = 2;
    instrDesc*  id = emitNewInstr();
    insFormat  fmt = emitInsModeFormat(ins, IF_FRD_TRD);

    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idReg1((regNumber)fpreg);
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}
#endif // FEATURE_STACK_FP_X87

/*****************************************************************************
 *
 *  Add an instruction referencing a single register.
 */

void                emitter::emitIns_R(instruction ins,
                                       emitAttr    attr,
                                       regNumber   reg)
{
    emitAttr   size = EA_SIZE(attr);

    assert(size <= EA_PTRSIZE);
    noway_assert(emitVerifyEncodable(ins, size, reg));

    UNATIVE_OFFSET sz;
    instrDesc*     id = emitNewInstrTiny(attr);

    switch (ins)
    {
    case INS_inc:
    case INS_dec:
#ifdef _TARGET_AMD64_

        sz = 2; // x64 has no 1-byte opcode (it is the same encoding as the REX prefix)

#else // !_TARGET_AMD64_

        if (size == EA_1BYTE)
            sz = 2; // Use the long form as the small one has no 'w' bit
        else
            sz = 1; // Use short form

#endif // !_TARGET_AMD64_

        break;

    case INS_pop:
    case INS_pop_hide:
    case INS_push:
    case INS_push_hide:

        /* We don't currently push/pop small values */

        assert(size == EA_PTRSIZE);

        sz = 1;
        break;

    default:

        /* All the sixteen INS_setCCs are contiguous. */

        if (INS_seto <= ins && ins <= INS_setg)
        {
            // Rough check that we used the endpoints for the range check

            assert(INS_seto + 0xF == INS_setg);

            // The caller must specify EA_1BYTE for 'attr'

            assert(attr == EA_1BYTE);

            /* We expect this to always be a 'big' opcode */

            assert(insEncodeMRreg(ins, reg, attr, insCodeMR(ins)) & 0x00FF0000);

            size = attr;

            sz = 3;
            break;
        }
        else
        {
            sz = 2;
            break;
        }
    }
    insFormat  fmt = emitInsModeFormat(ins, IF_RRD);

    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idReg1(reg);

    // 16-bit operand instructions will need a prefix.
    // This refers to 66h size prefix override.
    if (size == EA_2BYTE)
    {
        sz += 1;
    }

    // Vex bytes
    sz += emitGetVexPrefixAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins)));

    // REX byte
    if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, attr))
        sz += emitGetRexPrefixSize(ins);

    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;

#if !FEATURE_FIXED_OUT_ARGS

    if      (ins == INS_push)
    {
        emitCurStackLvl += emitCntStackDepth;

        if  (emitMaxStackDepth < emitCurStackLvl)
             emitMaxStackDepth = emitCurStackLvl;
    }
    else if (ins == INS_pop)
    {
        emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0);
    }

#endif // !FEATURE_FIXED_OUT_ARGS

}

/*****************************************************************************
 *
 *  Add an instruction referencing a register and a constant.
 */

void                emitter::emitIns_R_I(instruction ins,
                                         emitAttr    attr,
                                         regNumber   reg,
                                         ssize_t     val)
{
    emitAttr   size = EA_SIZE(attr);

    // Allow emitting SSE2/AVX SIMD instructions of R_I form that can specify EA_16BYTE or EA_32BYTE
    assert(size <= EA_PTRSIZE || IsSSEOrAVXInstruction(ins));

    noway_assert(emitVerifyEncodable(ins, size, reg));

#ifdef _TARGET_AMD64_
    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
    // all other opcodes take a sign-extended 4-byte immediate
    noway_assert(size < EA_8BYTE || ins == INS_mov || ((int)val == val && !EA_IS_CNS_RELOC(attr)));
#endif

    UNATIVE_OFFSET sz;
    instrDesc* id;
    insFormat  fmt       = emitInsModeFormat(ins, IF_RRD_CNS);
    bool       valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);

    // Figure out the size of the instruction
    switch (ins)
    {
    case INS_mov:
#ifdef _TARGET_AMD64_
        // mov reg, imm64 is equivalent to mov reg, imm32 if the high order bits are all 0
        // and this isn't a reloc constant.
        if (((size > EA_4BYTE) && (0 == (val & 0xFFFFFFFF00000000LL))) && !EA_IS_CNS_RELOC(attr))
        {
            attr = size = EA_4BYTE;
        }

        if (size > EA_4BYTE)
        {
            sz = 9; // Really it is 10, but we'll add one more later
            break;
        }
#endif // _TARGET_AMD64_
        sz = 5;
        break;

    case INS_rcl_N:
    case INS_rcr_N:
    case INS_rol_N:
    case INS_ror_N:
    case INS_shl_N:
    case INS_shr_N:
    case INS_sar_N:
        assert(val != 1);
        fmt  = IF_RRW_SHF;
        sz   = 3;
        val &= 0x7F;
        valInByte = true;       // shift amount always placed in a byte
        break;

    default:

        if (EA_IS_CNS_RELOC(attr))
            valInByte = false;  // relocs can't be placed in a byte

        if  (valInByte)
        {
            if (IsSSEOrAVXInstruction(ins))
                sz = 5;
            else
                sz = 3;
        }
        else
        {
            if  (reg == REG_EAX && !instrIs3opImul(ins))
            {
                sz = 1;
            }
            else
            {
                sz = 2;
            }

#ifdef _TARGET_AMD64_
            if (size > EA_4BYTE)
            {
                // We special-case anything that takes a full 8-byte constant.
                sz += 4;
            }
            else
#endif // _TARGET_AMD64_
            {
                sz += EA_SIZE_IN_BYTES(attr);
            }
        }
        break;
    }

    // Vex prefix size
    sz += emitGetVexPrefixSize(ins, attr);

    // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a 64-bit sized
    // operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target register. So we also
    // need to check if that built-in register is an extended register.
    if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins))
    {
        sz += emitGetRexPrefixSize(ins);
    }

#ifdef _TARGET_X86_
    assert(reg < 8);
#endif

    id             = emitNewInstrSC(attr, val);
    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idReg1(reg);

    // 16-bit operand instructions will need a prefix
    if (size == EA_2BYTE)
        sz += 1;

    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;

#if !FEATURE_FIXED_OUT_ARGS

    if  (reg == REG_ESP)
    {
        if  (emitCntStackDepth)
        {
            if      (ins == INS_sub)
            {
                S_UINT32 newStackLvl(emitCurStackLvl);
                newStackLvl += S_UINT32(val);
                noway_assert(!newStackLvl.IsOverflow());

                emitCurStackLvl = newStackLvl.Value();

                if  (emitMaxStackDepth < emitCurStackLvl)
                     emitMaxStackDepth = emitCurStackLvl;
            }
            else if (ins == INS_add)
            {
                S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val);
                noway_assert(!newStackLvl.IsOverflow());

                emitCurStackLvl = newStackLvl.Value();
            }
        }
    }

#endif // !FEATURE_FIXED_OUT_ARGS

}

/*****************************************************************************
 *
 *  Add an instruction referencing an integer constant.
 */

void                emitter::emitIns_I(instruction ins,
                                       emitAttr    attr,
                                       int         val
                                      )
{
    UNATIVE_OFFSET sz;
    instrDesc* id;
    bool       valInByte = ((signed char)val == val);

#ifdef _TARGET_AMD64_
    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
    // all other opcodes take a sign-extended 4-byte immediate
    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
#endif

    if (EA_IS_CNS_RELOC(attr))
        valInByte = false;  // relocs can't be placed in a byte

    switch (ins)
    {
    case INS_loop:
    case INS_jge:
        sz = 2;
        break;

    case INS_ret:
        sz = 3;
        break;

    case INS_push_hide:
    case INS_push:
        sz = valInByte ? 2 : 5;
        break;

    default:
        NO_WAY("unexpected instruction");
    }

    id                = emitNewInstrSC(attr, val);
    id->idIns(ins);
    id->idInsFmt(IF_CNS);
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;

#if !FEATURE_FIXED_OUT_ARGS

    if  (ins == INS_push)
    {
        emitCurStackLvl += emitCntStackDepth;

        if  (emitMaxStackDepth < emitCurStackLvl)
             emitMaxStackDepth = emitCurStackLvl;
    }

#endif // !FEATURE_FIXED_OUT_ARGS

}

/*****************************************************************************
 *
 *  Add a "jump through a table" instruction.
 */

void                emitter::emitIns_IJ(emitAttr attr,
                                        regNumber reg,
                                        unsigned base)
{
    assert(EA_SIZE(attr) == EA_4BYTE);

    UNATIVE_OFFSET               sz  = 3 + 4;
    const instruction            ins = INS_i_jmp;

    if (IsExtendedReg(reg, attr))
        sz += emitGetRexPrefixSize(ins);

    instrDesc* id  = emitNewInstrAmd(attr, base);

    id->idIns(ins);
    id->idInsFmt(IF_ARD);
    id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
    id->idAddr()->iiaAddrMode.amIndxReg = reg;
    id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZP;

#ifdef  DEBUG
    id->idDebugOnlyInfo()->idMemCookie  = base;
#endif

    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

/*****************************************************************************
 *
 *  Add an instruction with a static data member operand. If 'size' is 0, the
 *  instruction operates on the address of the static member instead of its
 *  value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
 */

void                emitter::emitIns_C(instruction  ins,
                                       emitAttr     attr,
                                       CORINFO_FIELD_HANDLE fldHnd,
                                       int          offs)
{
#if RELOC_SUPPORT
    // Static always need relocs
    if (!jitStaticFldIsGlobAddr(fldHnd))
        attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
#endif

    UNATIVE_OFFSET  sz;
    instrDesc*      id;

    /* Are we pushing the offset of the class variable? */

    if  (EA_IS_OFFSET(attr))
    {
        assert(ins == INS_push);
        sz = 1 + sizeof(void*);

        id                 = emitNewInstrDsp(EA_1BYTE, offs);
        id->idIns(ins);
        id->idInsFmt(IF_MRD_OFF);
    }
    else
    {
#if FEATURE_STACK_FP_X87
        insFormat fmt = emitInsModeFormat(ins, IF_MRD, IF_TRD_MRD, IF_MWR_TRD);
#else // !FEATURE_STACK_FP_X87
        insFormat fmt = emitInsModeFormat(ins, IF_MRD);
#endif // !FEATURE_STACK_FP_X87

        id                 = emitNewInstrDsp(attr, offs);
        id->idIns(ins);
        id->idInsFmt(fmt);
        sz                 = emitInsSizeCV(id, insCodeMR(ins));
    }

    // Vex prefix size
    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));

    if (TakesRexWPrefix(ins, attr))
    {
        // REX.W prefix
        sz += emitGetRexPrefixSize(ins);
    }

    id->idAddr()->iiaFieldHnd = fldHnd;

    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;


#if !FEATURE_FIXED_OUT_ARGS

    if      (ins == INS_push)
    {
        emitCurStackLvl += emitCntStackDepth;

        if  (emitMaxStackDepth < emitCurStackLvl)
             emitMaxStackDepth = emitCurStackLvl;
    }
    else if (ins == INS_pop)
    {
        emitCurStackLvl -= emitCntStackDepth;
        assert((int)emitCurStackLvl >= 0);
    }

#endif // !FEATURE_FIXED_OUT_ARGS

}

/*****************************************************************************
 *
 *  Add an instruction with two register operands.
 */

void                emitter::emitIns_R_R   (instruction ins,
                                            emitAttr    attr,
                                            regNumber   reg1,
                                            regNumber   reg2)
{
    /* We don't want to generate any useless mov instructions! */
    assert(ins != INS_mov || reg1 != reg2);

    emitAttr   size = EA_SIZE(attr);

    assert(size <= EA_32BYTE);
    noway_assert(emitVerifyEncodable(ins, size, reg1, reg2));

    UNATIVE_OFFSET  sz = emitInsSizeRR(ins, reg1, reg2, attr);

    /* Special case: "XCHG" uses a different format */
    insFormat fmt = (ins == INS_xchg) ? IF_RRW_RRW
                                      : emitInsModeFormat(ins, IF_RRD_RRD);

    instrDesc* id = emitNewInstrTiny(attr);
    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idReg1(reg1);
    id->idReg2(reg2);
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

/*****************************************************************************
 *
 *  Add an instruction with two register operands and an integer constant.
 */

void                emitter::emitIns_R_R_I (instruction ins,
                                            emitAttr    attr,
                                            regNumber   reg1,
                                            regNumber   reg2,
                                            int         ival)
{
    // SSE2 version requires 5 bytes and AVX version 6 bytes
    UNATIVE_OFFSET  sz = 4;
    if (IsSSEOrAVXInstruction(ins))
    {
        sz = UseAVX() ? 6 : 5;
    }

#ifdef _TARGET_AMD64_
    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
    // all other opcodes take a sign-extended 4-byte immediate
    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
#endif

    instrDesc* id = emitNewInstrSC(attr, ival);    

    // REX prefix
    if (IsExtendedReg(reg1, attr) || IsExtendedReg(reg2, attr))
    {
        sz += emitGetRexPrefixSize(ins);
    }

    id->idIns(ins);
    id->idInsFmt(IF_RRW_RRW_CNS);
    id->idReg1(reg1);
    id->idReg2(reg2);
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}
#ifdef FEATURE_AVX_SUPPORT
/*****************************************************************************
*
*  Add an instruction with three register operands.
*/

void                emitter::emitIns_R_R_R (instruction ins,
                                            emitAttr    attr,
                                            regNumber   targetReg,
                                            regNumber   reg1,
                                            regNumber   reg2)
{
    assert(IsSSEOrAVXInstruction(ins));
    assert(IsThreeOperandAVXInstruction(ins));
    //Currently vex prefix only use three bytes mode. 
    //size = vex + opcode + ModR/M = 3 + 1 + 1 = 5
    //TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
    UNATIVE_OFFSET  sz = 5;
   
    instrDesc* id = emitNewInstr(attr);
    id->idIns(ins);
    id->idInsFmt(IF_RWR_RRD_RRD);
    id->idReg1(targetReg);
    id->idReg2(reg1);
    id->idReg3(reg2);

    id->idCodeSize(sz);
    dispIns(id);
    emitCurIGsize += sz;
}


#endif
/*****************************************************************************
 *
 *  Add an instruction with a register + static member operands.
 */
void                emitter::emitIns_R_C(instruction  ins,
                                         emitAttr     attr,
                                         regNumber    reg,
                                         CORINFO_FIELD_HANDLE fldHnd,
                                         int          offs)
{
#if RELOC_SUPPORT
    // Static always need relocs
    if (!jitStaticFldIsGlobAddr(fldHnd))
        attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
#endif

    emitAttr   size = EA_SIZE(attr);

    assert(size <= EA_32BYTE);
    noway_assert(emitVerifyEncodable(ins, size, reg));

    UNATIVE_OFFSET  sz;
    instrDesc*      id;

    // Are we MOV'ing the offset of the class variable into EAX?
    if  (EA_IS_OFFSET(attr))
    {
        id                 = emitNewInstrDsp(EA_1BYTE, offs);
        id->idIns(ins);
        id->idInsFmt(IF_RWR_MRD_OFF);

        assert(ins == INS_mov && reg == REG_EAX);

        // Special case: "mov eax, [addr]" is smaller
        sz = 1 + sizeof(void*);
    }
    else
    {
        insFormat fmt = emitInsModeFormat(ins, IF_RRD_MRD);

        id                 = emitNewInstrDsp(attr, offs);
        id->idIns(ins);
        id->idInsFmt(fmt);

#ifdef _TARGET_X86_
        // Special case: "mov eax, [addr]" is smaller.
        // This case is not enabled for amd64 as it always uses RIP relative addressing
        // and it results in smaller instruction size than encoding 64-bit addr in the
        // instruction.
        if (ins == INS_mov && reg == REG_EAX)
        {
            sz = 1 + sizeof(void*);
            if (size == EA_2BYTE)
                sz += 1;
        }
        else
#endif //_TARGET_X86_
        {
            sz = emitInsSizeCV(id, insCodeRM(ins));
        }

        // Special case: mov reg, fs:[ddd]
        if (fldHnd == FLD_GLOBAL_FS)
            sz += 1;
    }

    // VEX prefix
    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));

    // REX prefix
    if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
    {
        sz += emitGetRexPrefixSize(ins);
    }

    id->idReg1(reg);
    id->idCodeSize(sz);

    id->idAddr()->iiaFieldHnd = fldHnd;

    dispIns(id);
    emitCurIGsize += sz;
}

/*****************************************************************************
 *
 *  Add an instruction with a static member + register operands.
 */

void                emitter::emitIns_C_R  (instruction  ins,
                                           emitAttr     attr,
                                           CORINFO_FIELD_HANDLE fldHnd,
                                           regNumber    reg,
                                           int          offs)
{
#if RELOC_SUPPORT
    // Static always need relocs
    if (!jitStaticFldIsGlobAddr(fldHnd))
        attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
#endif

    emitAttr   size = EA_SIZE(attr);

#if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
    // For x86 RyuJIT it is valid to storeind a double sized operand in an xmm reg to memory
    assert(size <= EA_8BYTE);
#else
    assert(size <= EA_PTRSIZE);
#endif

    noway_assert(emitVerifyEncodable(ins, size, reg));

    instrDesc*      id  = emitNewInstrDsp(attr, offs);
    insFormat       fmt = emitInsModeFormat(ins, IF_MRD_RRD);

    id->idIns(ins);
    id->idInsFmt(fmt);

    UNATIVE_OFFSET  sz;

#ifdef _TARGET_X86_
    // Special case: "mov [addr], EAX" is smaller.
    // This case is not enable for amd64 as it always uses RIP relative addressing
    // and it will result in smaller instruction size than encoding 64-bit addr in
    // the instruction.
    if (ins == INS_mov && reg == REG_EAX)
    {
        sz = 1 + sizeof(void*);
        if (size == EA_2BYTE)
            sz += 1;
    }
    else
#endif //_TARGET_X86_
    {
        sz = emitInsSizeCV(id, insCodeMR(ins));
    }

    // Special case: mov reg, fs:[ddd]
    if (fldHnd == FLD_GLOBAL_FS)
    {
        sz += 1;
    }

    // VEX prefix
    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));

    // REX prefix
    if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
    {
        sz += emitGetRexPrefixSize(ins);
    }

    id->idReg1(reg);
    id->idCodeSize(sz);

    id->idAddr()->iiaFieldHnd = fldHnd;

    dispIns(id);
    emitCurIGsize += sz;
}

/*****************************************************************************
 *
 *  Add an instruction with a static member + constant.
 */

void                emitter::emitIns_C_I   (instruction  ins,
                                            emitAttr     attr,
                                            CORINFO_FIELD_HANDLE fldHnd,
                                            int          offs,
                                            int          val)
{
#if RELOC_SUPPORT
    // Static always need relocs
    if (!jitStaticFldIsGlobAddr(fldHnd))
        attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
#endif

    insFormat      fmt;

    switch (ins)
    {
    case INS_rcl_N:
    case INS_rcr_N:
    case INS_rol_N:
    case INS_ror_N:
    case INS_shl_N:
    case INS_shr_N:
    case INS_sar_N:
        assert(val != 1);
        fmt  = IF_MRW_SHF;
        val &= 0x7F;
        break;

    default:
        fmt = emitInsModeFormat(ins, IF_MRD_CNS);
        break;
    }

    instrDesc*      id     = emitNewInstrCnsDsp(attr, val, offs);
    id->idIns(ins);
    id->idInsFmt(fmt);

    size_t code = insCodeMI(ins);
    UNATIVE_OFFSET  sz     = emitInsSizeCV(id, code, val);

#ifdef _TARGET_AMD64_
    // Vex prefix
    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));

    // REX prefix, if not already included in "code"
    if (TakesRexWPrefix(ins, attr) && (code & REX_PREFIX_MASK) == 0)
    {
        sz += emitGetRexPrefixSize(ins);
    }
#endif // _TARGET_AMD64_

    id->idAddr()->iiaFieldHnd = fldHnd;
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

void                emitter::emitIns_J_S    (instruction ins,
                                             emitAttr    attr,
                                             BasicBlock* dst,
                                             int         varx,
                                             int         offs)
{
    assert(ins == INS_mov);
    assert(dst->bbFlags & BBF_JMP_TARGET);

    instrDescLbl* id          = emitNewInstrLbl();

    id->idIns(ins);
    id->idInsFmt(IF_SWR_LABEL);
    id->idAddr()->iiaBBlabel     = dst;

    /* The label reference is always long */

    id->idjShort              = 0;
    id->idjKeepLong           = 1;

    /* Record the current IG and offset within it */

    id->idjIG                 = emitCurIG;
    id->idjOffs               = emitCurIGsize;

    /* Append this instruction to this IG's jump list */

    id->idjNext               = emitCurIGjmpList;
                                emitCurIGjmpList = id;

    UNATIVE_OFFSET  sz        = sizeof(INT32) + emitInsSizeSV(insCodeMI(ins), varx, offs);
    id->dstLclVar.initLclVarAddr(varx, offs);
#ifdef DEBUG
    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
#endif

#if EMITTER_STATS
    emitTotalIGjmps++;
#endif

#if RELOC_SUPPORT
    // Storing the address of a basicBlock will need a reloc
    // as the instruction uses the absolute address,
    // not a relative address.
    //
    // On Amd64, Absolute code addresses should always go through a reloc to
    // to be encoded as RIP rel32 offset. 
#ifndef _TARGET_AMD64_
    if (emitComp->opts.compReloc)
#endif
    {
        id->idSetIsDspReloc();
    }
#endif //RELOC_SUPPORT

    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}


/*****************************************************************************
 *
 *  Add a label instruction.
 */
void                emitter::emitIns_R_L  (instruction   ins,
                                           emitAttr      attr,
                                           BasicBlock*   dst,
                                           regNumber     reg)
{
    assert(ins == INS_lea);
    assert(dst->bbFlags & BBF_JMP_TARGET);

    instrDescJmp*   id  = emitNewInstrJmp();

    id->idIns(ins);
    id->idReg1(reg);
    id->idInsFmt(IF_RWR_LABEL);
    id->idOpSize(EA_SIZE(attr));    // emitNewInstrJmp() sets the size (incorrectly) to EA_1BYTE
    id->idAddr()->iiaBBlabel     = dst;

    /* The label reference is always long */

    id->idjShort              = 0;
    id->idjKeepLong           = 1;

    /* Record the current IG and offset within it */

    id->idjIG                 = emitCurIG;
    id->idjOffs               = emitCurIGsize;

    /* Append this instruction to this IG's jump list */

    id->idjNext               = emitCurIGjmpList;
                                emitCurIGjmpList = id;

#ifdef DEBUG
    // Mark the catch return
    if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
    {
        id->idDebugOnlyInfo()->idCatchRet = true;
    }
#endif // DEBUG

#if EMITTER_STATS
    emitTotalIGjmps++;
#endif

    UNATIVE_OFFSET  sz = emitInsSizeAM(id, insCodeRM(ins));
    id->idCodeSize(sz);

    // Set the relocation flags - these give hint to zap to perform
    // relocation of the specified 32bit address.
    id->idSetRelocFlags(attr);

    dispIns(id);
    emitCurIGsize += sz;
}

/*****************************************************************************
 *
 *  The following adds instructions referencing address modes.
 */

void                emitter::emitIns_I_AR  (instruction ins,
                                            emitAttr    attr,
                                            int         val,
                                            regNumber   reg,
                                            int         disp,
                                            int         memCookie,
                                            void*       clsCookie)
{
    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));

#ifdef _TARGET_AMD64_
    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
    // all other opcodes take a sign-extended 4-byte immediate
    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
#endif

    insFormat      fmt;

    switch (ins)
    {
    case INS_rcl_N:
    case INS_rcr_N:
    case INS_rol_N:
    case INS_ror_N:
    case INS_shl_N:
    case INS_shr_N:
    case INS_sar_N:
#ifdef _TARGET_X86_
        assert(val != 1);
#endif
        fmt  = IF_ARW_SHF;
        val &= 0x7F;
        break;

    default:
        fmt  = emitInsModeFormat(ins, IF_ARD_CNS);
        break;
    }

    /*
    Useful if you want to trap moves with 0 constant
    if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
    {
        printf("MOV 0\n");
    }
    */

    UNATIVE_OFFSET  sz;
    instrDesc*      id               = emitNewInstrAmdCns(attr, disp, val);
    id->idIns(ins);
    id->idInsFmt(fmt);

    assert((memCookie == NULL) == (clsCookie == NULL));

#ifdef  DEBUG
    id->idDebugOnlyInfo()->idMemCookie   = memCookie;
    id->idDebugOnlyInfo()->idClsCookie   = clsCookie;
#endif

    id->idAddr()->iiaAddrMode.amBaseReg = reg;
    id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeMI(ins), val);
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

void                emitter::emitIns_I_AI  (instruction ins,
                                            emitAttr    attr,
                                            int         val,
                                            ssize_t     disp)
{
    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));

#ifdef _TARGET_AMD64_
    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
    // all other opcodes take a sign-extended 4-byte immediate
    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
#endif

    insFormat      fmt;

    switch (ins)
    {
    case INS_rcl_N:
    case INS_rcr_N:
    case INS_rol_N:
    case INS_ror_N:
    case INS_shl_N:
    case INS_shr_N:
    case INS_sar_N:
        assert(val != 1);
        fmt  = IF_ARW_SHF;
        val &= 0x7F;
        break;

    default:
        fmt  = emitInsModeFormat(ins, IF_ARD_CNS);
        break;
    }

    /*
    Useful if you want to trap moves with 0 constant
    if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
    {
        printf("MOV 0\n");
    }
    */

    UNATIVE_OFFSET  sz;
    instrDesc*      id               = emitNewInstrAmdCns(attr, disp, val);
    id->idIns(ins);
    id->idInsFmt(fmt);

    id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
    id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeMI(ins), val);
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

void                emitter::emitIns_R_AR (instruction ins,
                                           emitAttr    attr,
                                           regNumber   ireg,
                                           regNumber   base,
                                           int         disp,
                                           int         memCookie,
                                           void*       clsCookie)
{
    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE) && (ireg != REG_NA));
    noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));

    if  (ins == INS_lea)
    {
        if (ireg == base && disp == 0)
        {
            // Maybe the emitter is not the common place for this optimization, but it's a better choke point
            // for all the emitIns(ins, tree), we would have to be analyzing at each call site
            //
            return;
        }
    }

    UNATIVE_OFFSET  sz;
    instrDesc*      id               = emitNewInstrAmd(attr, disp);
    insFormat       fmt              = emitInsModeFormat(ins, IF_RRD_ARD);

    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idReg1(ireg);

    assert((memCookie == NULL) == (clsCookie == NULL));

#ifdef  DEBUG
    id->idDebugOnlyInfo()->idMemCookie                  = memCookie;
    id->idDebugOnlyInfo()->idClsCookie                  = clsCookie;
#endif

    id->idAddr()->iiaAddrMode.amBaseReg = base;
    id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeRM(ins));
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

void                emitter::emitIns_R_AI (instruction ins,
                                           emitAttr    attr,
                                           regNumber   ireg,
                                           ssize_t     disp)
{
    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
    noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));

    UNATIVE_OFFSET  sz;
    instrDesc*      id               = emitNewInstrAmd(attr, disp);
    insFormat       fmt              = emitInsModeFormat(ins, IF_RRD_ARD);

    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idReg1(ireg);

    id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
    id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeRM(ins));
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

void                emitter::emitIns_AR_R (instruction ins,
                                           emitAttr    attr,
                                           regNumber   ireg,
                                           regNumber   base,
                                           int         disp,
                                           int         memCookie,
                                           void*       clsCookie)
{
    UNATIVE_OFFSET  sz;
    instrDesc*      id               = emitNewInstrAmd(attr, disp);
    insFormat       fmt;

    if  (ireg == REG_NA)
    {
#if FEATURE_STACK_FP_X87
        fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
#else // !FEATURE_STACK_FP_X87
        fmt = emitInsModeFormat(ins, IF_ARD);
#endif // !FEATURE_STACK_FP_X87
    }
    else
    {
        fmt = emitInsModeFormat(ins, IF_ARD_RRD);

        assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE));
        noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));

        id->idReg1(ireg);
    }

    id->idIns(ins);
    id->idInsFmt(fmt);

    assert((memCookie == NULL) == (clsCookie == NULL));

#ifdef  DEBUG
    id->idDebugOnlyInfo()->idMemCookie                  = memCookie;
    id->idDebugOnlyInfo()->idClsCookie                  = clsCookie;
#endif

    id->idAddr()->iiaAddrMode.amBaseReg = base;
    id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeMR(ins));
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;

#if !FEATURE_FIXED_OUT_ARGS

    if      (ins == INS_push)
    {
        emitCurStackLvl += emitCntStackDepth;

        if  (emitMaxStackDepth < emitCurStackLvl)
             emitMaxStackDepth = emitCurStackLvl;
    }
    else if (ins == INS_pop)
    {
        emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0);
    }

#endif // !FEATURE_FIXED_OUT_ARGS
}

void                emitter::emitIns_AI_R (instruction ins,
                                           emitAttr    attr,
                                           regNumber   ireg,
                                           ssize_t     disp)
{
    UNATIVE_OFFSET  sz;
    instrDesc*      id               = emitNewInstrAmd(attr, disp);
    insFormat       fmt;

    if  (ireg == REG_NA)
    {
#if FEATURE_STACK_FP_X87
        fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
#else // FEATURE_STACK_FP_X87
        fmt = emitInsModeFormat(ins, IF_ARD);
#endif // FEATURE_STACK_FP_X87
    }
    else
    {
        fmt = emitInsModeFormat(ins, IF_ARD_RRD);

        assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
        noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));

        id->idReg1(ireg);
    }

    id->idIns(ins);
    id->idInsFmt(fmt);

    id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
    id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeMR(ins));
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;

#if !FEATURE_FIXED_OUT_ARGS

    if (ins == INS_push)
    {
        emitCurStackLvl += emitCntStackDepth;

        if  (emitMaxStackDepth < emitCurStackLvl)
             emitMaxStackDepth = emitCurStackLvl;
    }
    else if (ins == INS_pop)
    {
        emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0);
    }

#endif // !FEATURE_FIXED_OUT_ARGS
}

void                emitter::emitIns_I_ARR (instruction ins,
                                            emitAttr    attr,
                                            int         val,
                                            regNumber   reg,
                                            regNumber   rg2,
                                            int         disp)
{
    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));

#ifdef _TARGET_AMD64_
    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
    // all other opcodes take a sign-extended 4-byte immediate
    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
#endif

    insFormat      fmt;

    switch (ins)
    {
    case INS_rcl_N:
    case INS_rcr_N:
    case INS_rol_N:
    case INS_ror_N:
    case INS_shl_N:
    case INS_shr_N:
    case INS_sar_N:
        assert(val != 1);
        fmt  = IF_ARW_SHF;
        val &= 0x7F;
        break;

    default:
        fmt  = emitInsModeFormat(ins, IF_ARD_CNS);
        break;
    }

    UNATIVE_OFFSET  sz;
    instrDesc*      id                  = emitNewInstrAmdCns(attr, disp, val);
    id->idIns(ins);
    id->idInsFmt(fmt);

    id->idAddr()->iiaAddrMode.amBaseReg = reg;
    id->idAddr()->iiaAddrMode.amIndxReg = rg2;
    id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZ1;

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeMI(ins), val);
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

void                emitter::emitIns_R_ARR(instruction ins,
                                           emitAttr    attr,
                                           regNumber   ireg,
                                           regNumber   base,
                                           regNumber   index,
                                           int         disp)
{
    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
    noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));

    UNATIVE_OFFSET  sz;
    instrDesc*      id               = emitNewInstrAmd(attr, disp);
    insFormat       fmt              = emitInsModeFormat(ins, IF_RRD_ARD);

    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idReg1(ireg);

    id->idAddr()->iiaAddrMode.amBaseReg = base;
    id->idAddr()->iiaAddrMode.amIndxReg = index;
    id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZ1;

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeRM(ins));
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

void                emitter::emitIns_ARR_R (instruction ins,
                                            emitAttr    attr,
                                            regNumber   ireg,
                                            regNumber   reg,
                                            regNumber   index,
                                            int         disp)
{
    UNATIVE_OFFSET  sz;
    instrDesc*      id   = emitNewInstrAmd(attr, disp);
    insFormat       fmt;

    if  (ireg == REG_NA)
    {
#if FEATURE_STACK_FP_X87
        fmt  = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
#else // FEATURE_STACK_FP_X87
        fmt  = emitInsModeFormat(ins, IF_ARD);
#endif // FEATURE_STACK_FP_X87
    }
    else
    {
        fmt  = emitInsModeFormat(ins, IF_ARD_RRD);

        assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
        noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));

        id->idReg1(ireg);
    }

    id->idIns(ins);
    id->idInsFmt(fmt);

    id->idAddr()->iiaAddrMode.amBaseReg = reg;
    id->idAddr()->iiaAddrMode.amIndxReg = index;
    id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(1);

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeMR(ins));
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;

#if !FEATURE_FIXED_OUT_ARGS

    if      (ins == INS_push)
    {
        emitCurStackLvl += emitCntStackDepth;

        if  (emitMaxStackDepth < emitCurStackLvl)
             emitMaxStackDepth = emitCurStackLvl;
    }
    else if (ins == INS_pop)
    {
        emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0);
    }

#endif // !FEATURE_FIXED_OUT_ARGS

}

void                emitter::emitIns_I_ARX (instruction ins,
                                            emitAttr    attr,
                                            int         val,
                                            regNumber   reg,
                                            regNumber   rg2,
                                            unsigned    mul,
                                            int         disp)
{
    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));

#ifdef _TARGET_AMD64_
    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
    // all other opcodes take a sign-extended 4-byte immediate
    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
#endif

    insFormat      fmt;

    switch (ins)
    {
    case INS_rcl_N:
    case INS_rcr_N:
    case INS_rol_N:
    case INS_ror_N:
    case INS_shl_N:
    case INS_shr_N:
    case INS_sar_N:
        assert(val != 1);
        fmt  = IF_ARW_SHF;
        val &= 0x7F;
        break;

    default:
        fmt  = emitInsModeFormat(ins, IF_ARD_CNS);
        break;
    }

    UNATIVE_OFFSET  sz;
    instrDesc*      id               = emitNewInstrAmdCns(attr, disp, val);

    id->idIns(ins);
    id->idInsFmt(fmt);

    id->idAddr()->iiaAddrMode.amBaseReg = reg;
    id->idAddr()->iiaAddrMode.amIndxReg = rg2;
    id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeMI(ins), val);
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

void                emitter::emitIns_R_ARX (instruction ins,
                                            emitAttr    attr,
                                            regNumber   ireg,
                                            regNumber   base,
                                            regNumber   index,
                                            unsigned    mul,
                                            int         disp)
{
    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
    noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));

    UNATIVE_OFFSET  sz;
    instrDesc*      id               = emitNewInstrAmd(attr, disp);
    insFormat       fmt              = emitInsModeFormat(ins, IF_RRD_ARD);

    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idReg1(ireg);

    id->idAddr()->iiaAddrMode.amBaseReg = base;
    id->idAddr()->iiaAddrMode.amIndxReg = index;
    id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeRM(ins));
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

void                emitter::emitIns_ARX_R (instruction ins,
                                            emitAttr    attr,
                                            regNumber   ireg,
                                            regNumber   base,
                                            regNumber   index,
                                            unsigned    mul,
                                            int         disp)
{
    UNATIVE_OFFSET  sz;
    instrDesc*      id   = emitNewInstrAmd(attr, disp);
    insFormat       fmt;

    if  (ireg == REG_NA)
    {
#if FEATURE_STACK_FP_X87
        fmt  = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
#else // !FEATURE_STACK_FP_X87
        fmt  = emitInsModeFormat(ins, IF_ARD);
#endif // !FEATURE_STACK_FP_X87
    }
    else
    {
        fmt = emitInsModeFormat(ins, IF_ARD_RRD);

        noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
        assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));

        id->idReg1(ireg);
    }

    id->idIns(ins);
    id->idInsFmt(fmt);

    id->idAddr()->iiaAddrMode.amBaseReg = base;
    id->idAddr()->iiaAddrMode.amIndxReg = index;
    id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeMR(ins));
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;

#if !FEATURE_FIXED_OUT_ARGS

    if      (ins == INS_push)
    {
        emitCurStackLvl += emitCntStackDepth;

        if  (emitMaxStackDepth < emitCurStackLvl)
             emitMaxStackDepth = emitCurStackLvl;
    }
    else if (ins == INS_pop)
    {
        emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0);
    }

#endif // !FEATURE_FIXED_OUT_ARGS

}

void                emitter::emitIns_I_AX (instruction ins,
                                           emitAttr    attr,
                                           int         val,
                                           regNumber   reg,
                                           unsigned    mul,
                                           int         disp)
{
    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));

#ifdef _TARGET_AMD64_
    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
    // all other opcodes take a sign-extended 4-byte immediate
    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
#endif

    insFormat      fmt;

    switch (ins)
    {
    case INS_rcl_N:
    case INS_rcr_N:
    case INS_rol_N:
    case INS_ror_N:
    case INS_shl_N:
    case INS_shr_N:
    case INS_sar_N:
        assert(val != 1);
        fmt  = IF_ARW_SHF;
        val &= 0x7F;
        break;

    default:
        fmt  = emitInsModeFormat(ins, IF_ARD_CNS);
        break;
    }

    UNATIVE_OFFSET  sz;
    instrDesc*      id               = emitNewInstrAmdCns(attr, disp, val);
    id->idIns(ins);
    id->idInsFmt(fmt);

    id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
    id->idAddr()->iiaAddrMode.amIndxReg = reg;
    id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeMI(ins), val);
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

void                emitter::emitIns_R_AX  (instruction ins,
                                            emitAttr    attr,
                                            regNumber   ireg,
                                            regNumber   reg,
                                            unsigned    mul,
                                            int         disp)
{
    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
    noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));

    UNATIVE_OFFSET  sz;
    instrDesc*      id               = emitNewInstrAmd(attr, disp);
    insFormat       fmt              = emitInsModeFormat(ins, IF_RRD_ARD);

    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idReg1(ireg);

    id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
    id->idAddr()->iiaAddrMode.amIndxReg = reg;
    id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeRM(ins));
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;
}

void                emitter::emitIns_AX_R  (instruction ins,
                                            emitAttr    attr,
                                            regNumber   ireg,
                                            regNumber   reg,
                                            unsigned    mul,
                                            int         disp)
{
    UNATIVE_OFFSET  sz;
    instrDesc*      id               = emitNewInstrAmd(attr, disp);
    insFormat       fmt;

    if  (ireg == REG_NA)
    {
#if FEATURE_STACK_FP_X87
        fmt  = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
#else // !FEATURE_STACK_FP_X87
        fmt  = emitInsModeFormat(ins, IF_ARD);
#endif // !FEATURE_STACK_FP_X87
    }
    else
    {
        fmt  = emitInsModeFormat(ins, IF_ARD_RRD);
        noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
        assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));

        id->idReg1(ireg);
    }

    id->idIns(ins);
    id->idInsFmt(fmt);

    id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
    id->idAddr()->iiaAddrMode.amIndxReg = reg;
    id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);

    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly

    sz = emitInsSizeAM(id, insCodeMR(ins));
    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;

#if !FEATURE_FIXED_OUT_ARGS

    if      (ins == INS_push)
    {
        emitCurStackLvl += emitCntStackDepth;

        if  (emitMaxStackDepth < emitCurStackLvl)
             emitMaxStackDepth = emitCurStackLvl;
    }
    else if (ins == INS_pop)
    {
        emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0);
    }

#endif // !FEATURE_FIXED_OUT_ARGS

}

/*****************************************************************************
 *
 *  The following add instructions referencing stack-based local variables.
 */

void                emitter::emitIns_S     (instruction ins,
                                            emitAttr    attr,
                                            int         varx,
                                            int         offs)
{
    instrDesc*      id               = emitNewInstr(attr);
    UNATIVE_OFFSET  sz               = emitInsSizeSV(insCodeMR(ins), varx, offs);
#if FEATURE_STACK_FP_X87
    insFormat       fmt              = emitInsModeFormat(ins, IF_SRD, IF_TRD_SRD, IF_SWR_TRD);
#else // !FEATURE_STACK_FP_X87
    insFormat       fmt              = emitInsModeFormat(ins, IF_SRD);
#endif // !FEATURE_STACK_FP_X87

    // 16-bit operand instructions will need a prefix
    if (EA_SIZE(attr) == EA_2BYTE)
    {
        sz += 1;
    }

    // VEX prefix
    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));

    // 64-bit operand instructions will need a REX.W prefix
    if (TakesRexWPrefix(ins, attr))
    {
        sz += emitGetRexPrefixSize(ins);
    }

    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
    id->idCodeSize(sz);

#ifdef DEBUG
    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
#endif
    dispIns(id);
    emitCurIGsize += sz;

#if !FEATURE_FIXED_OUT_ARGS

    if (ins == INS_push)
    {
        emitCurStackLvl += emitCntStackDepth;

        if  (emitMaxStackDepth < emitCurStackLvl)
             emitMaxStackDepth = emitCurStackLvl;
    }
    else if (ins == INS_pop)
    {
        emitCurStackLvl -= emitCntStackDepth; assert((int)emitCurStackLvl >= 0);
    }

#endif // !FEATURE_FIXED_OUT_ARGS

}

void                emitter::emitIns_S_R  (instruction ins,
                                           emitAttr    attr,
                                           regNumber   ireg,
                                           int         varx,
                                           int         offs)
{
    instrDesc*      id               = emitNewInstr(attr);
    UNATIVE_OFFSET  sz               = emitInsSizeSV(insCodeMR(ins), varx, offs);
    insFormat       fmt              = emitInsModeFormat(ins, IF_SRD_RRD);

    // 16-bit operand instructions will need a prefix
    if (EA_SIZE(attr) == EA_2BYTE)
    {
        sz++;
    }

    // VEX prefix
    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));

    // 64-bit operand instructions will need a REX.W prefix
    if (TakesRexWPrefix(ins, attr)
        || IsExtendedReg(ireg, attr))
    {
        sz += emitGetRexPrefixSize(ins);
    }

    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idReg1(ireg);
    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
    id->idCodeSize(sz);
#ifdef DEBUG
    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
#endif
    dispIns(id);
    emitCurIGsize += sz;
}

void                emitter::emitIns_R_S  (instruction ins,
                                           emitAttr    attr,
                                           regNumber   ireg,
                                           int         varx,
                                           int         offs)
{
    emitAttr   size = EA_SIZE(attr);
    noway_assert(emitVerifyEncodable(ins, size, ireg));

    instrDesc*      id               = emitNewInstr(attr);
    UNATIVE_OFFSET  sz               = emitInsSizeSV(insCodeRM(ins), varx, offs);
    insFormat       fmt              = emitInsModeFormat(ins, IF_RRD_SRD);

    // Most 16-bit operand instructions need a prefix
    if (size == EA_2BYTE && ins != INS_movsx
                         && ins != INS_movzx)
    {
        sz++;
    }

    // VEX prefix
    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));

    // 64-bit operand instructions will need a REX.W prefix
    if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
    {
        sz += emitGetRexPrefixSize(ins);
    }

    id->idIns(ins);
    id->idInsFmt(fmt);
    id->idReg1(ireg);
    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
    id->idCodeSize(sz);
#ifdef DEBUG
    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
#endif
    dispIns(id);
    emitCurIGsize += sz;
}

void                emitter::emitIns_S_I  (instruction ins,
                                           emitAttr    attr,
                                           int         varx,
                                           int         offs,
                                           int         val)
{
#ifdef _TARGET_AMD64_
    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
    // all other opcodes take a sign-extended 4-byte immediate
    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
#endif

    insFormat      fmt;

    switch (ins)
    {
    case INS_rcl_N:
    case INS_rcr_N:
    case INS_rol_N:
    case INS_ror_N:
    case INS_shl_N:
    case INS_shr_N:
    case INS_sar_N:
        assert(val != 1);
        fmt                        = IF_SRW_SHF;
        val &= 0x7F;
        break;

    default:
        fmt                        = emitInsModeFormat(ins, IF_SRD_CNS);
        break;
    }

    instrDesc*      id             = emitNewInstrCns(attr, val);
    id->idIns(ins);
    id->idInsFmt(fmt);
    UNATIVE_OFFSET  sz             = emitInsSizeSV(id, varx, offs, val);

    // VEX prefix
    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));

    // 64-bit operand instructions will need a REX.W prefix
    if (TakesRexWPrefix(ins, attr))
    {
        sz += emitGetRexPrefixSize(ins);
    }

    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
    id->idCodeSize(sz);
#ifdef DEBUG
    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
#endif
    dispIns(id);
    emitCurIGsize += sz;
}

/*****************************************************************************
 *
 *  Record that a jump instruction uses the short encoding
 *
 */
void  emitter::emitSetShortJump(instrDescJmp* id)
{
    if (id->idjKeepLong)
        return;

    id->idjShort = true;
}

/*****************************************************************************
 *
 *  Add a jmp instruction.
 */

void                emitter::emitIns_J(instruction   ins,
                                       BasicBlock*   dst,
                                       int           instrCount /* = 0 */)
{
    UNATIVE_OFFSET  sz;
    instrDescJmp*   id        = emitNewInstrJmp();

    assert(dst->bbFlags & BBF_JMP_TARGET);

    id->idIns(ins);
    id->idInsFmt(IF_LABEL);
    id->idAddr()->iiaBBlabel     = dst;

#ifdef DEBUG
    // Mark the finally call
    if (ins == INS_call && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY)
    {
        id->idDebugOnlyInfo()->idFinallyCall = true;
    }
#endif // DEBUG

    /* Assume the jump will be long */

    id->idjShort              = 0;
    id->idjKeepLong           = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);

    /* Record the jump's IG and offset within it */

    id->idjIG                 = emitCurIG;
    id->idjOffs               = emitCurIGsize;

    /* Append this jump to this IG's jump list */

    id->idjNext               = emitCurIGjmpList;
                                emitCurIGjmpList = id;

#if EMITTER_STATS
    emitTotalIGjmps++;
#endif

    /* Figure out the max. size of the jump/call instruction */

    if  (ins == INS_call)
    {
        sz = CALL_INST_SIZE;
    }
    else if (ins == INS_push || ins == INS_push_hide)
    {
#if RELOC_SUPPORT
        // Pushing the address of a basicBlock will need a reloc
        // as the instruction uses the absolute address,
        // not a relative address
        if (emitComp->opts.compReloc)
            id->idSetIsDspReloc();
#endif
        sz = PUSH_INST_SIZE;

    }
    else
    {
        insGroup* tgt;

        /* This is a jump - assume the worst */

        sz = (ins == INS_jmp) ? JMP_SIZE_LARGE
                              : JCC_SIZE_LARGE;

        /* Can we guess at the jump distance? */

        tgt = (insGroup*)emitCodeGetCookie(dst);

        if  (tgt)
        {
            int             extra;
            UNATIVE_OFFSET  srcOffs;
            int             jmpDist;

            assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);

            /* This is a backward jump - figure out the distance */

            srcOffs = emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL;

            /* Compute the distance estimate */

            jmpDist = srcOffs - tgt->igOffs; assert((int)jmpDist > 0);

            /* How much beyond the max. short distance does the jump go? */

            extra = jmpDist + JMP_DIST_SMALL_MAX_NEG;

#if     DEBUG_EMIT
            if  (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
            {
                if  (INTERESTING_JUMP_NUM == 0)
                printf("[0] Jump %u:\n",               id->idDebugOnlyInfo()->idNum);
                printf("[0] Jump source is at %08X\n", srcOffs);
                printf("[0] Label block is at %08X\n", tgt->igOffs);
                printf("[0] Jump  distance  - %04X\n", jmpDist);
                if  (extra > 0)
                printf("[0] Distance excess = %d  \n", extra);
            }
#endif

            if  (extra <= 0 && !id->idjKeepLong)
            {
                /* Wonderful - this jump surely will be short */

                emitSetShortJump(id);
                sz = JMP_SIZE_SMALL;
            }
        }
#if     DEBUG_EMIT
        else
        {
            if  (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
            {
                if  (INTERESTING_JUMP_NUM == 0)
                printf("[0] Jump %u:\n",               id->idDebugOnlyInfo()->idNum);
                printf("[0] Jump source is at %04X/%08X\n", emitCurIGsize, emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL);
                printf("[0] Label block is unknown\n");
            }
        }
#endif
    }

    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize += sz;

#if !FEATURE_FIXED_OUT_ARGS

    if      (ins == INS_push)
    {
        emitCurStackLvl += emitCntStackDepth;

        if  (emitMaxStackDepth < emitCurStackLvl)
             emitMaxStackDepth = emitCurStackLvl;
    }

#endif // !FEATURE_FIXED_OUT_ARGS

}

/*****************************************************************************
 *
 *  Add a call instruction (direct or indirect).
 *      argSize<0 means that the caller will pop the arguments
 *
 * The other arguments are interpreted depending on callType as shown:
 * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
 *
 * EC_FUNC_TOKEN       : addr is the method address
 * EC_FUNC_TOKEN_INDIR : addr is the indirect method address
 * EC_FUNC_ADDR        : addr is the absolute address of the function
 * EC_FUNC_VIRTUAL     : "call [ireg+disp]"
 *
 * If callType is one of these emitCallTypes, addr has to be NULL.
 * EC_INDIR_R          : "call ireg".
 * EC_INDIR_SR         : "call lcl<disp>" (eg. call [ebp-8]).
 * EC_INDIR_C          : "call clsVar<disp>" (eg. call [clsVarAddr])
 * EC_INDIR_ARD        : "call [ireg+xreg*xmul+disp]"
 *
 */

void                emitter::emitIns_Call(EmitCallType  callType,
                                          CORINFO_METHOD_HANDLE methHnd,
                                          INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo)     // used to report call sites to the EE
                                          void*         addr,
                                          ssize_t       argSize,
                                          emitAttr      retSize,
                                          VARSET_VALARG_TP ptrVars,
                                          regMaskTP     gcrefRegs,
                                          regMaskTP     byrefRegs,
                                          IL_OFFSETX    ilOffset /* = BAD_IL_OFFSET */,
                                          regNumber     ireg    /* = REG_NA */,
                                          regNumber     xreg    /* = REG_NA */,
                                          unsigned      xmul    /* = 0     */,
                                          ssize_t       disp    /* = 0     */,
                                          bool          isJump  /* = false */,
                                          bool          isNoGC  /* = false */)
{
    /* Sanity check the arguments depending on callType */

    assert(callType < EC_COUNT);
    assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_TOKEN_INDIR && callType != EC_FUNC_ADDR) ||
           (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
    assert(callType != EC_FUNC_VIRTUAL ||
           (ireg < REG_COUNT && xreg == REG_NA && xmul == 0));
    assert(callType < EC_INDIR_R || callType == EC_INDIR_ARD || callType == EC_INDIR_C || addr == NULL);
    assert(callType != EC_INDIR_R ||
           (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
    assert(callType != EC_INDIR_SR ||
           (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp < (int)emitComp->lvaCount));
    assert(callType != EC_INDIR_C ||
           (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp != 0));


    // Our stack level should be always greater than the bytes of arguments we push. Just
    // a sanity test.
    assert((unsigned) abs((signed)argSize) <= codeGen->genStackLevel);

#if STACK_PROBES
    if (emitComp->opts.compNeedStackProbes)
    {
        // If we've pushed more than JIT_RESERVED_STACK allows, do an aditional stack probe
        // Else, just make sure the prolog does a probe for us. Invariant we're trying
        // to get is that at any point we go out to unmanaged code, there is at least
        // CORINFO_STACKPROBE_DEPTH bytes of stack available.
        //
        // The reason why we are not doing one probe for the max size at the prolog
        // is that when don't have the max depth precomputed (it can depend on codegen),
        // and we need it at the time we generate locallocs
        //
        // Compiler::lvaAssignFrameOffsets sets up compLclFrameSize, which takes in
        // account everything except for the arguments of a callee.
        //
        //
        //
        if (    (sizeof(void*) + // return address for call
                 emitComp->genStackLevel +
                                 // Current stack level. This gets resetted on every
                                 // localloc and on the prolog (invariant is that
                                 // genStackLevel is 0 on basic block entry and exit and
                                 // after any alloca). genStackLevel will include any arguments
                                 // to the call, so we will insert an aditional probe if
                                 // we've consumed more than JIT_RESERVED_STACK bytes
                                 // of stack, which is what the prolog probe covers (in
                                 // addition to the EE requested size)
                 (emitComp->compHndBBtabCount * sizeof(void*))
                                 // Hidden slots for calling finallys
                 )
                    >= JIT_RESERVED_STACK)
        {
            // This happens when you have a call with a lot of arguments or a call is done
            // when there's a lot of stuff pushed on the stack (for example a call whos returned
            // value is an argument of another call that has pushed stuff on the stack)
            // This should't be very frequent.
            // For different values of JIT_RESERVED_STACK
            //
            // For mscorlib (109605 calls)
            //
            // 14190 probes in prologs (56760 bytes of code)
            //
            // JIT_RESERVED_STACK = 16 : 5452 extra probes
            // JIT_RESERVED_STACK = 32 : 1084 extra probes
            // JIT_RESERVED_STACK = 64 :    1 extra probes
            // JIT_RESERVED_STACK = 96 :    0 extra probes
            emitComp->genGenerateStackProbe();
        }
        else
        {
            if (emitComp->compGeneratingProlog || emitComp->compGeneratingEpilog)
            {
                if (emitComp->compStackProbePrologDone)
                {
                    // We already generated a probe and this call is not happening
                    // at a depth >= JIT_RESERVED_STACK, so nothing to do here
                }
                else
                {
                    // 3 possible ways to get here:
                    // - We are in an epilog and haven't generated a probe in the prolog.
                    //   This shouldn't happen as we don't generate any calls in epilog.
                    // - We are in the prolog, but doing a call before generating the probe.
                    //   This shouldn't happen at all.
                    // - We are in the prolog, did not generate a probe but now we need
                    //   to generate a probe because we need a call (eg: profiler). We'll
                    //   need a probe.
                    //
                    // In any case, we need a probe

                    // Ignore the profiler callback for now.
                    if (!emitComp->compIsProfilerHookNeeded())
                    {
                        assert(!"We do not expect to get here");
                        emitComp->genGenerateStackProbe();
                    }
                }
            }
            else
            {
                // We will need a probe and will generate it in the prolog
                emitComp->genNeedPrologStackProbe = true;
            }
        }
    }
#endif // STACK_PROBES


    int             argCnt;

    UNATIVE_OFFSET  sz;
    instrDesc*      id;

    /* This is the saved set of registers after a normal call */
    unsigned savedSet = RBM_CALLEE_SAVED;

    /* some special helper calls have a different saved set registers */

    if (isNoGC)
    {
        // Get the set of registers that this call kills and remove it from the saved set.
        savedSet = RBM_ALLINT & ~emitComp->compNoGCHelperCallKillSet(Compiler::eeGetHelperNum(methHnd));
    }
    else
    {
        assert(!emitNoGChelper(Compiler::eeGetHelperNum(methHnd)));
    }

    /* Trim out any callee-trashed registers from the live set */

    gcrefRegs &= savedSet;
    byrefRegs &= savedSet;

#ifdef  DEBUG
    if  (EMIT_GC_VERBOSE)
    {
        printf("\t\t\t\t\t\t\tCall: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
        dumpConvertedVarSet(emitComp, ptrVars);
        printf(", gcrefRegs=");
        printRegMaskInt(gcrefRegs);
        emitDispRegSet (gcrefRegs);
        printf(", byrefRegs=");
        printRegMaskInt(byrefRegs);
        emitDispRegSet (byrefRegs);
        printf("\n");
    }
#endif

    assert(  argSize % sizeof(void*) == 0);
    argCnt = (int)(argSize / (ssize_t)sizeof(void*)); // we need a signed-divide
    
#ifdef DEBUGGING_SUPPORT
    /* Managed RetVal: emit sequence point for the call */
    if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
    {
        codeGen->genIPmappingAdd(ilOffset, false);
    }
#endif

    /*
        We need to allocate the appropriate instruction descriptor based
        on whether this is a direct/indirect call, and whether we need to
        record an updated set of live GC variables.

        The stats for a ton of classes is as follows:

            Direct call w/o  GC vars        220,216
            Indir. call w/o  GC vars        144,781

            Direct call with GC vars          9,440
            Indir. call with GC vars          5,768
     */

    if  (callType >= EC_FUNC_VIRTUAL)
    {
        /* Indirect call, virtual calls */

        assert(callType == EC_FUNC_VIRTUAL || callType == EC_INDIR_R ||
               callType == EC_INDIR_SR     || callType == EC_INDIR_C ||
               callType == EC_INDIR_ARD);

        id  = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, retSize);
    }
    else
    {
        /* Helper/static/nonvirtual/function calls (direct or through handle),
           and calls to an absolute addr. */

        assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR ||
               callType == EC_FUNC_ADDR);

        id  = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize);
    }

    /* Update the emitter's live GC ref sets */

    VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
    emitThisGCrefRegs = gcrefRegs;
    emitThisByrefRegs = byrefRegs;

    /* Set the instruction - special case jumping a function */
    instruction ins = INS_call;

    if (isJump)
    {
        assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR);
        if (callType == EC_FUNC_TOKEN)
            ins = INS_l_jmp;
        else
            ins = INS_i_jmp;
    }
    id->idIns(ins);

    id->idSetIsNoGC(isNoGC);

    // Record the address: method, indirection, or funcptr
    if  (callType >= EC_FUNC_VIRTUAL)
    {
        // This is an indirect call (either a virtual call or func ptr call) 

        switch (callType)
        {
        case EC_INDIR_C:
            // Indirect call using an absolute code address.
            // Must be marked as relocatable and is done at the 
            // branch target location.
            goto CALL_ADDR_MODE;

        case EC_INDIR_R:            // the address is in a register

            id->idSetIsCallRegPtr();

            __fallthrough;

        case EC_INDIR_ARD:          // the address is an indirection

            goto CALL_ADDR_MODE;

        case EC_INDIR_SR:           // the address is in a lcl var

            id->idInsFmt(IF_SRD);
            // disp is really a lclVarNum
            noway_assert((unsigned)disp == (size_t)disp);
            id->idAddr()->iiaLclVar.initLclVarAddr((unsigned)disp, 0);
            sz = emitInsSizeSV(insCodeMR(INS_call), (unsigned)disp, 0);

            break;

        case EC_FUNC_VIRTUAL:

        CALL_ADDR_MODE:

            // fall-through

            // The function is "ireg" if id->idIsCallRegPtr(),
            // else [ireg+xmul*xreg+disp]

            id->idInsFmt(IF_ARD);

            id->idAddr()->iiaAddrMode.amBaseReg = ireg;
            id->idAddr()->iiaAddrMode.amIndxReg = xreg;
            id->idAddr()->iiaAddrMode.amScale   = xmul ? emitEncodeScale(xmul) : emitter::OPSZ1;

            sz = emitInsSizeAM(id, insCodeMR(INS_call));

            if (ireg == REG_NA && xreg == REG_NA)
            {
                if (codeGen->genCodeIndirAddrNeedsReloc(disp))
                {
                    id->idSetIsDspReloc();
                }
#ifdef _TARGET_AMD64_
                else
                {
                    // An absolute indir address that doesn't need reloc should fit within 32-bits
                    // to be encoded as offset relative to zero.  This addr mode requires an extra
                    // SIB byte
                    noway_assert((int)addr == (size_t)addr);
                    sz++;
                }
#endif //_TARGET_AMD64_
            }

            break;

        default:
            NO_WAY("unexpected instruction");
            break;
        }

    }
    else if (callType == EC_FUNC_TOKEN_INDIR)
    {
        /* "call [method_addr]" */

        assert(addr != nullptr);

        id->idInsFmt(IF_METHPTR);
        id->idAddr()->iiaAddr            = (BYTE*)addr;
        sz                               = 6;

#if RELOC_SUPPORT
        // Since this is an indirect call through a pointer and we don't
        // currently pass in emitAttr into this function, we query codegen
        // whether addr needs a reloc.
        if (codeGen->genCodeIndirAddrNeedsReloc((size_t)addr))
        {
            id->idSetIsDspReloc();
        }
#ifdef _TARGET_AMD64_
        else
        {
            // An absolute indir address that doesn't need reloc should fit within 32-bits
            // to be encoded as offset relative to zero.  This addr mode requires an extra
            // SIB byte
            noway_assert((int)addr == (size_t)addr);
            sz++;
        }
#endif //_TARGET_AMD64_
#endif //RELOC_SUPPORT

    }
    else
    {
        /* This is a simple direct call: "call helper/method/addr" */

        assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);

        assert(addr != nullptr);

        id->idInsFmt(IF_METHOD);
        sz                               = 5;

        id->idAddr()->iiaAddr = (BYTE*)addr;

        if (callType == EC_FUNC_ADDR)
        {
            id->idSetIsCallAddr();
        }

#if RELOC_SUPPORT
        // Direct call to a method and no addr indirection is needed. 
        if (codeGen->genCodeAddrNeedsReloc((size_t)addr))
        {
            id->idSetIsDspReloc();
        }
#endif
    }

#ifdef  DEBUG
    if  (emitComp->verbose&&0)
    {
        if  (id->idIsLargeCall())
        {
            if  (callType >= EC_FUNC_VIRTUAL)
                printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum, VarSetOps::ToString(emitComp,((instrDescCGCA*)id)->idcGCvars));
            else
                printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum, VarSetOps::ToString(emitComp,((instrDescCGCA*)id)->idcGCvars));
        }
    }
#endif

#if defined(DEBUG) || defined(LATE_DISASM)
    id->idDebugOnlyInfo()->idMemCookie = (size_t) methHnd;    // method token
    id->idDebugOnlyInfo()->idClsCookie = 0;
    id->idDebugOnlyInfo()->idCallSig   = sigInfo;
#endif

#if defined(LATE_DISASM)
    if (addr != nullptr)
    {
        codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
    }
#endif // defined(LATE_DISASM)

    id->idCodeSize(sz);

    dispIns(id);
    emitCurIGsize   += sz;

#if !FEATURE_FIXED_OUT_ARGS

    /* The call will pop the arguments */

    if  (emitCntStackDepth && argSize > 0)
    {
        noway_assert((ssize_t)emitCurStackLvl >= argSize);
        emitCurStackLvl -= (int)argSize; assert((int)emitCurStackLvl >= 0);
    }

#endif // !FEATURE_FIXED_OUT_ARGS

}

#ifdef  DEBUG
/*****************************************************************************
 *
 *  The following called for each recorded instruction -- use for debugging.
 */
void                emitter::emitInsSanityCheck(instrDesc* id)
{
    // make certain you only try to put relocs on things that can have them.
    ID_OPS idOp = (ID_OPS) emitFmtToOps[id->idInsFmt()];
    if ((idOp == ID_OP_SCNS) && id->idIsLargeCns())
    {
        idOp = ID_OP_CNS;
    }

    if (!id->idIsTiny())
    {
        if (id->idIsDspReloc())
        {
            assert(idOp == ID_OP_NONE    ||
                   idOp == ID_OP_AMD     ||
                   idOp == ID_OP_DSP     ||
                   idOp == ID_OP_DSP_CNS ||
                   idOp == ID_OP_AMD_CNS ||
                   idOp == ID_OP_SPEC    ||
                   idOp == ID_OP_CALL    ||
                   idOp == ID_OP_JMP     ||
                   idOp == ID_OP_LBL);
        }

        if (id->idIsCnsReloc())
        {
            assert(idOp == ID_OP_CNS     ||
                   idOp == ID_OP_AMD_CNS ||
                   idOp == ID_OP_DSP_CNS ||
                   idOp == ID_OP_SPEC    ||
                   idOp == ID_OP_CALL    ||
                   idOp == ID_OP_JMP);
        }
    }
}
#endif

/*****************************************************************************
 *
 *  Return the allocated size (in bytes) of the given instruction descriptor.
 */

size_t              emitter::emitSizeOfInsDsc(instrDesc* id)
{
    if  (emitIsTinyInsDsc(id))
        return  TINY_IDSC_SIZE;

    if  (emitIsScnsInsDsc(id))
        return SMALL_IDSC_SIZE;

    assert((unsigned)id->idInsFmt() < emitFmtCount);

    ID_OPS idOp = (ID_OPS) emitFmtToOps[id->idInsFmt()];

    // An INS_call instruction may use a "fat" direct/indirect call descriptor
    // except for a local call to a label (i.e. call to a finally)
    // Only ID_OP_CALL and ID_OP_SPEC check for this, so we enforce that the
    //  INS_call instruction always uses one of these idOps

    if (id->idIns() == INS_call)
    {
        assert(idOp == ID_OP_CALL  ||       // is a direct   call
               idOp == ID_OP_SPEC  ||       // is a indirect call
               idOp == ID_OP_JMP       );   // is a local call to finally clause
    }

    switch (idOp)
    {
    case ID_OP_NONE:
        break;

    case ID_OP_LBL:
        return  sizeof(instrDescLbl);

    case ID_OP_JMP:
        return  sizeof(instrDescJmp);

    case ID_OP_CALL:
    case ID_OP_SPEC:
        if  (id->idIsLargeCall())
        {
            /* Must be a "fat" indirect call descriptor */
            return  sizeof(instrDescCGCA);
        }

        __fallthrough;

    case ID_OP_SCNS:
    case ID_OP_CNS:
    case ID_OP_DSP:
    case ID_OP_DSP_CNS:
    case ID_OP_AMD:
    case ID_OP_AMD_CNS:
        if (id->idIsLargeCns())
        {
            if (id->idIsLargeDsp())
                return sizeof(instrDescCnsDsp);
            else
                return sizeof(instrDescCns);
        }
        else
        {
            if (id->idIsLargeDsp())
                return sizeof(instrDescDsp);
            else
                return sizeof(instrDesc);
        }

    default:
        NO_WAY("unexpected instruction descriptor format");
        break;
    }

    return  sizeof(instrDesc);
}

/*****************************************************************************/
#ifdef  DEBUG
/*****************************************************************************
 *
 *  Return a string that represents the given register.
 */

const char*         emitter::emitRegName(regNumber reg, emitAttr attr, bool varName)
{
    static char          rb[2][128];
    static unsigned char rbc = 0;

    const char* rn = emitComp->compRegVarName(reg, varName);

#ifdef _TARGET_AMD64_
    char suffix = '\0';

    switch (EA_SIZE(attr))
    {
    case EA_32BYTE:
        return emitYMMregName(reg);
        break;
    case EA_16BYTE:
        return emitXMMregName(reg);
        break;

    case EA_8BYTE:
        break;

    case EA_4BYTE:
        if (reg > REG_R15)
            break;

        if (reg > REG_RDI)
        {
            suffix = 'd';
            goto APPEND_SUFFIX;
        }
        rbc = (rbc+1)%2;
        rb[rbc][0] = 'e';
        rb[rbc][1] = rn[1];
        rb[rbc][2] = rn[2];
        rb[rbc][3] = 0;
        rn = rb[rbc];
        break;

    case EA_2BYTE:
        if (reg > REG_RDI)
        {
            suffix = 'w';
            goto APPEND_SUFFIX;
        }
        rn++;
        break;

    case EA_1BYTE:
        if (reg > REG_RDI)
        {
            suffix = 'b';
APPEND_SUFFIX:
            rbc = (rbc+1)%2;
            rb[rbc][0] = rn[0];
            rb[rbc][1] = rn[1];
            if (rn[2])
            {
                assert(rn[3] == 0);
                rb[rbc][2] = rn[2];
                rb[rbc][3] = suffix;
                rb[rbc][4] = 0;
            }
            else
            {
                rb[rbc][2] = suffix;
                rb[rbc][3] = 0;
            }
        }
        else
        {
            rbc = (rbc+1)%2;
            rb[rbc][0] = rn[1];
            if (reg < 4)
            {
                rb[rbc][1] = 'l';
                rb[rbc][2] = 0;
            }
            else
            {
                rb[rbc][1] = rn[2];
                rb[rbc][2] = 'l';
                rb[rbc][3] = 0;
            }
        }

        rn = rb[rbc];
        break;

    default:
        break;
    }
#endif // _TARGET_AMD64_

#ifdef _TARGET_X86_
    assert(strlen(rn) >= 3);

    switch (EA_SIZE(attr))
    {
    case EA_4BYTE:
        break;

    case EA_2BYTE:
        rn++;
        break;

    case EA_1BYTE:
        rbc = (rbc+1)%2;
        rb[rbc][0] = rn[1];
        rb[rbc][1] = 'l';
        strcpy_s(&rb[rbc][2], sizeof(rb[0])-2, rn+3);

        rn = rb[rbc];
        break;

    default:
        break;
    }
#endif // _TARGET_X86_

#if 0
    // The following is useful if you want register names to be tagged with * or ^ representing gcref or byref, respectively,
    // however it's possibly not interesting most of the time.
    if (EA_IS_GCREF(attr) || EA_IS_BYREF(attr))
    {
        if (rn != rb[rbc])
        {
            rbc = (rbc+1)%2;
            strcpy_s(rb[rbc], sizeof(rb[rbc]), rn);
            rn = rb[rbc];
        }

        if (EA_IS_GCREF(attr))
        {
            strcat_s(rb[rbc], sizeof(rb[rbc]), "*");
        }
        else if (EA_IS_BYREF(attr))
        {
            strcat_s(rb[rbc], sizeof(rb[rbc]), "^");
        }
    }
#endif // 0

    return  rn;
}

/*****************************************************************************
 *
 *  Return a string that represents the given FP register.
 */

const char*         emitter::emitFPregName(unsigned reg, bool varName)
{
    assert(reg < REG_COUNT);

    return emitComp->compFPregVarName((regNumber)(reg), varName);
}

/*****************************************************************************
 *
 *  Return a string that represents the given XMM register.
 */

const char*         emitter::emitXMMregName(unsigned reg)
{
    static const char* const regNames[] =
    {
        #define REGDEF(name, rnum, mask, sname) "x" sname,
#ifndef LEGACY_BACKEND
        #include "register.h"
#else // LEGACY_BACKEND
        #include "registerxmm.h"
#endif // LEGACY_BACKEND
    };

    assert(reg < REG_COUNT);
    assert(reg < sizeof(regNames)/sizeof(regNames[0]));

    return regNames[reg];
}

/*****************************************************************************
 *
 *  Return a string that represents the given YMM register.
 */

const char*         emitter::emitYMMregName(unsigned reg)
{
    static const char* const regNames[] =
    {
        #define REGDEF(name, rnum, mask, sname) "y" sname,
#ifndef LEGACY_BACKEND
        #include "register.h"
#else // LEGACY_BACKEND
        #include "registerxmm.h"
#endif // LEGACY_BACKEND
    };

    assert(reg < REG_COUNT);
    assert(reg < sizeof(regNames)/sizeof(regNames[0]));

    return regNames[reg];
}

/*****************************************************************************
 *
 *  Display a static data member reference.
 */

void                emitter::emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc /* = false */)
{
    int doffs;

    /* Filter out the special case of fs:[offs] */

    // Munge any pointers if we want diff-able disassembly
    if (emitComp->opts.disDiffable)
    {
        ssize_t top12bits = (offs >> 20);
        if ((top12bits != 0) && (top12bits != -1))
            offs = 0xD1FFAB1E;
    }

    if (fldHnd == FLD_GLOBAL_FS)
    {
        printf("FS:[0x%04X]", offs);
        return;
    }

    if (fldHnd == FLD_GLOBAL_DS)
    {
        printf("[0x%04X]", offs);
        return;
    }

    printf("[");

    doffs = Compiler::eeGetJitDataOffs(fldHnd);

#ifdef RELOC_SUPPORT
    if (reloc)
        printf("reloc ");
#endif

    if (doffs >= 0)
    {
        if  (doffs & 1)
            printf("@CNS%02u", doffs-1);
        else
            printf("@RWD%02u", doffs);

        if  (offs)
            printf("%+Id", offs);
    }
    else
    {
        printf("classVar[%#x]", emitComp->dspPtr(fldHnd));

        if  (offs)
            printf("%+Id", offs);
    }

    printf("]");

    if  (emitComp->opts.varNames && offs < 0)
    {
        printf("'%s", emitComp->eeGetFieldName(fldHnd));
        if (offs) printf("%+Id", offs);
        printf("'");
    }
}

/*****************************************************************************
 *
 *  Display a stack frame reference.
 */

void                emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
{
    int         addr;
    bool        bEBP;

    printf("[");

    if  (!asmfm || emitComp->lvaDoneFrameLayout == Compiler::NO_FRAME_LAYOUT)
    {
        if  (varx < 0)
            printf("TEMP_%02u", -varx);
        else
            printf("V%02u", +varx);

        if      (disp < 0)
                printf("-0x%X", -disp);
        else if (disp > 0)
                printf("+0x%X", +disp);
    }

    if  (emitComp->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
    {
        if  (!asmfm)
            printf(" ");

        addr = emitComp->lvaFrameAddress(varx, &bEBP) + disp;

        if  (bEBP)
        {
            printf(STR_FPBASE);

            if      (addr < 0)
                printf("-%02XH", -addr);
            else if (addr > 0)
                printf("+%02XH",  addr);
        }
        else
        {
            /* Adjust the offset by amount currently pushed on the stack */

            printf(STR_SPBASE);

            if      (addr < 0)
                printf("-%02XH", -addr);
            else if (addr > 0)
                printf("+%02XH",  addr);

#if !FEATURE_FIXED_OUT_ARGS

            if  (emitCurStackLvl)
                printf("+%02XH", emitCurStackLvl);

#endif // !FEATURE_FIXED_OUT_ARGS

        }
    }

    printf("]");

    if  (varx >= 0 && emitComp->opts.varNames)
    {
        LclVarDsc*  varDsc;
        const char* varName;

        assert((unsigned)varx < emitComp->lvaCount);
        varDsc  = emitComp->lvaTable + varx;
        varName = emitComp->compLocalVarName(varx, offs);

        if  (varName)
        {
            printf("'%s", varName);

            if      (disp < 0)
                    printf("-%d", -disp);
            else if (disp > 0)
                    printf("+%d", +disp);

            printf("'");
        }
    }
}

/*****************************************************************************
 *
 *  Display an reloc value
 *  If we are formatting for an assembly listing don't print the hex value
 *  since it will prevent us from doing assembly diffs
 */
void                emitter::emitDispReloc(ssize_t value)
{
    if (emitComp->opts.disAsm)
    {
        printf("(reloc)");
    }
    else
    {
        printf("(reloc 0x%Ix)", emitComp->dspPtr(value));
    }
}

/*****************************************************************************
 *
 *  Display an address mode.
 */

void                emitter::emitDispAddrMode(instrDesc* id, bool noDetail)
{
    bool            nsep = false;
    ssize_t         disp;

    unsigned        jtno = 0;
    dataSection*    jdsc = 0;

    /* The displacement field is in an unusual place for calls */

    disp = (id->idIns() == INS_call) ? emitGetInsCIdisp(id)
                                     : emitGetInsAmdAny(id);

    /* Display a jump table label if this is a switch table jump */

    if  (id->idIns() == INS_i_jmp)
    {
        UNATIVE_OFFSET  offs = 0;

        /* Find the appropriate entry in the data section list */

        for (jdsc = emitConsDsc.dsdList, jtno = 0;
             jdsc;
             jdsc = jdsc->dsNext)
        {
            UNATIVE_OFFSET  size = jdsc->dsSize;

            /* Is this a label table? */

            if  (size & 1)
            {
                size--;
                jtno++;

                if  (offs == id->idDebugOnlyInfo()->idMemCookie)
                    break;
            }

            offs += size;
        }

        /* If we've found a matching entry then is a table jump */

        if  (jdsc)
        {
#ifdef RELOC_SUPPORT
            if (id->idIsDspReloc())
            {
                printf("reloc ");
            }
#endif
            printf("J_M%03u_DS%02u", Compiler::s_compMethodsCount, id->idDebugOnlyInfo()->idMemCookie);
        }

        disp -= id->idDebugOnlyInfo()->idMemCookie;
    }

    bool frameRef = false;

    printf("[");

    if  (id->idAddr()->iiaAddrMode.amBaseReg != REG_NA)
    {
        printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
        nsep = true;
        if (id->idAddr()->iiaAddrMode.amBaseReg == REG_ESP)
            frameRef = true;
        else if (emitComp->isFramePointerUsed() && id->idAddr()->iiaAddrMode.amBaseReg == REG_EBP)
            frameRef = true;
    }

    if  (id->idAddr()->iiaAddrMode.amIndxReg != REG_NA)
    {
        size_t          scale = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);

        if  (nsep)
            printf("+");
        if  (scale > 1)
            printf("%u*", scale);
        printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amIndxReg));
        nsep = true;
    }

#ifdef RELOC_SUPPORT
    if ((id->idIsDspReloc()) && (id->idIns() != INS_i_jmp))
    {
        if  (nsep)
            printf("+");
        emitDispReloc(disp);
    }
    else
#endif
    {
        // Munge any pointers if we want diff-able disassembly
        if (emitComp->opts.disDiffable)
        {
            ssize_t top12bits = (disp >> 20);
            if ((top12bits != 0) && (top12bits != -1))
                disp = 0xD1FFAB1E;
        }

        if (disp > 0)
        {
            if  (nsep)
                printf("+");
            if (frameRef)
                printf("%02XH", disp);
            else if (disp < 1000)
                printf("%d", disp);
            else if (disp <= 0xFFFF)
                printf("%04XH", disp);
            else
                printf("%08XH", disp);
        }
        else if (disp < 0)
        {
            if (frameRef)
                printf("-%02XH", -disp);
            else if (disp > -1000)
                printf("-%d", -disp);
            else if (disp >= -0xFFFF)
                printf("-%04XH", -disp);
            else if ((disp & 0x7F000000) != 0x7F000000)
                printf("%08XH", disp);
            else
                printf("-%08XH", -disp);
        }
        else if (!nsep)
        {
           printf("%04XH", disp);
        }
    }

    printf("]");

    if  (id->idDebugOnlyInfo()->idClsCookie)
    {
        if  (id->idIns() == INS_call)
            printf("%s", emitFncName((CORINFO_METHOD_HANDLE) id->idDebugOnlyInfo()->idMemCookie));
        else
            printf("%s", emitFldName((CORINFO_FIELD_HANDLE) id->idDebugOnlyInfo()->idMemCookie));
    }
           // pretty print string if it looks like one
    else if (id->idGCref() == GCT_GCREF && id->idIns() == INS_mov &&
             id->idAddr()->iiaAddrMode.amBaseReg == REG_NA) {
        const wchar_t* str = emitComp->eeGetCPString(disp);
        if (str != 0)
            printf("      '%S'", str);
    }

    if  (jdsc && !noDetail)
    {
        unsigned        cnt = (jdsc->dsSize - 1) / sizeof(void*);
        BasicBlock**    bbp = (BasicBlock**)jdsc->dsCont;

#ifdef _TARGET_AMD64_
#  define SIZE_LETTER "Q"
#else
#  define SIZE_LETTER "D"
#endif
        printf("\n\n    J_M%03u_DS%02u LABEL   " SIZE_LETTER "WORD", Compiler::s_compMethodsCount, jtno);

        /* Display the label table (it's stored as "BasicBlock*" values) */

        do
        {
            insGroup* lab;

            /* Convert the BasicBlock* value to an IG address */

            lab = (insGroup*)emitCodeGetCookie(*bbp++); assert(lab);

            printf("\n            D" SIZE_LETTER "      G_M%03u_IG%02u", Compiler::s_compMethodsCount, lab->igNum);
        }
        while (--cnt);
    }
}

/*****************************************************************************
 *
 *  If the given instruction is a shift, display the 2nd operand.
 */

void                emitter::emitDispShift(instruction ins, int cnt)
{
    switch (ins)
    {
    case INS_rcl_1:
    case INS_rcr_1:
    case INS_rol_1:
    case INS_ror_1:
    case INS_shl_1:
    case INS_shr_1:
    case INS_sar_1:
        printf(", 1");
        break;

    case INS_rcl:
    case INS_rcr:
    case INS_rol:
    case INS_ror:
    case INS_shl:
    case INS_shr:
    case INS_sar:
        printf(", cl");
        break;

    case INS_rcl_N:
    case INS_rcr_N:
    case INS_rol_N:
    case INS_ror_N:
    case INS_shl_N:
    case INS_shr_N:
    case INS_sar_N:
        printf(", %d", cnt);
        break;

    default:
        break;
    }
}

/*****************************************************************************
 *
 *  Display (optionally) the bytes for the instruction encoding in hex 
 */

void                emitter::emitDispInsHex(BYTE *  code, size_t sz)
{
    // We do not display the instruction hex if we want diff-able disassembly
    if (!emitComp->opts.disDiffable)
    {
#ifdef _TARGET_AMD64_
        // how many bytes per instruction we format for
        const size_t digits = 10;
#else // _TARGET_X86
        const size_t digits = 6;
#endif
        printf(" ");
        for (unsigned i=0; i < sz; i++)
        {
            printf("%02X", (*((BYTE *) (code+i))));
        }

        if (sz < digits)
        {
            printf("%.*s", 2 * (digits - sz), "                         ");
        }
    }
}


/*****************************************************************************
 *
 *  Display the given instruction.
 */

void                emitter::emitDispIns(instrDesc*   id,
                                         bool         isNew,
                                         bool         doffs,
                                         bool         asmfm,
                                         unsigned     offset,
                                         BYTE*        code,
                                         size_t       sz,
                                         insGroup*    ig)
{
    emitAttr        attr;
    const char*     sstr;

    instruction     ins = id->idIns();

    if (emitComp->verbose)
    {
        unsigned idNum = id->idDebugOnlyInfo()->idNum;
        printf("IN%04x: ", idNum);
    }

#ifdef RELOC_SUPPORT
# define ID_INFO_DSP_RELOC ((bool) (id->idIsDspReloc()))
#else
# define ID_INFO_DSP_RELOC false
#endif
    /* Display a constant value if the instruction references one */

    if  (!isNew)
    {
        switch (id->idInsFmt())
        {
            int             offs;

        case IF_MRD_RRD:
        case IF_MWR_RRD:
        case IF_MRW_RRD:

        case IF_RRD_MRD:
        case IF_RWR_MRD:
        case IF_RRW_MRD:

        case IF_MRD_CNS:
        case IF_MWR_CNS:
        case IF_MRW_CNS:
        case IF_MRW_SHF:

        case IF_MRD:
        case IF_MWR:
        case IF_MRW:

#if FEATURE_STACK_FP_X87

        case IF_TRD_MRD:
        case IF_TWR_MRD:
        case IF_TRW_MRD:

//      case IF_MRD_TRD:
        case IF_MWR_TRD:
//      case IF_MRW_TRD:

#endif // FEATURE_STACK_FP_X87
        case IF_MRD_OFF:


            /* Is this actually a reference to a data section? */

            offs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd);

            if  (offs >= 0)
            {
                void* addr;

                /* Display a data section reference */

                assert((unsigned)offs < emitConsDsc.dsdOffs);
                addr = emitConsBlock ? emitConsBlock + offs : nullptr;

#if 0
                // TODO-XArch-Cleanup: Fix or remove this code.
                /* Is the operand an integer or floating-point value? */

                bool isFP = false;

                if  (CodeGen::instIsFP(id->idIns()))
                {
                    switch (id->idIns())
                    {
                    case INS_fild:
                    case INS_fildl:
                        break;

                    default:
                        isFP = true;
                        break;
                    }
                }

                if (offs & 1)
                    printf("@CNS%02u", offs);
                else
                    printf("@RWD%02u", offs);

                printf("      ");

                if  (addr)
                {
                    addr = 0;
                    // TODO-XArch-Bug?:
                    //          This was busted by switching the order
                    //          in which we output the code block vs.
                    //          the data blocks -- when we get here,
                    //          the data block has not been filled in
                    //          yet, so we'll display garbage.

                    if  (isFP)
                    {
                        if  (id->idOpSize() == EA_4BYTE)
                            printf("DF      %f \n", addr ? *(float   *)addr : 0);
                        else
                            printf("DQ      %lf\n", addr ? *(double  *)addr : 0);
                    }
                    else
                    {
                        if  (id->idOpSize() <= EA_4BYTE)
                            printf("DD      %d \n", addr ? *(int     *)addr : 0);
                        else
                            printf("DQ      %D \n", addr ? *(__int64 *)addr : 0);
                    }
                }
#endif
            }
            break;

        default:
            break;
        }
    }

//  printf("[F=%s] "   , emitIfName(id->idInsFmt()));
//  printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum);
//  printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth);
//  printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32));
//  printf("[A=%08X] " , emitSimpleStkMask);
//  printf("[A=%08X] " , emitSimpleByrefStkMask);
//  printf("[L=%02u] " , id->idCodeSize());

    if  (!emitComp->opts.dspEmit && !isNew && !asmfm)
        doffs = true;

    /* Display the instruction offset */

    emitDispInsOffs(offset, doffs);

    if (code != nullptr)
    {
        /* Display the instruction hex code */

        emitDispInsHex(code, sz);
    }

    /* Display the instruction name */

    sstr = codeGen->genInsName(ins);
#ifdef FEATURE_AVX_SUPPORT
    if (IsAVXInstruction(ins))
    {
        printf(" v%-8s", sstr);
    }
    else
#endif // FEATURE_AVX_SUPPORT
    {
        printf(" %-9s", sstr);
    }
#ifndef FEATURE_PAL
    if (strnlen_s(sstr, 10) > 8)
#else // FEATURE_PAL
    if (strnlen(sstr, 10) > 8)
#endif // FEATURE_PAL
    {
        printf(" ");
    }

    /* By now the size better be set to something */

    assert(emitInstCodeSz(id) || emitInstHasNoCode(ins));

    /* Figure out the operand size */

    if       (id->idGCref() == GCT_GCREF)
    {
        attr = EA_GCREF;
        sstr = "gword ptr ";
    }
    else if  (id->idGCref() == GCT_BYREF)
    {
        attr = EA_BYREF;
        sstr = "bword ptr ";
    }
    else
    {
        attr = id->idOpSize();
        sstr = codeGen->genSizeStr(attr);

        if (ins == INS_lea)
        {
#ifdef _TARGET_AMD64_
            assert((attr == EA_4BYTE) || (attr == EA_8BYTE));
#else
            assert(attr == EA_4BYTE);
#endif
            sstr = "";
        }
    }

    /* Now see what instruction format we've got */
    

    // First print the implicit register usage
    if  (instrHasImplicitRegPairDest(ins))
    {
        printf("%s:%s, ", emitRegName(REG_EDX, id->idOpSize()), emitRegName(REG_EAX, id->idOpSize()));
    }
    else if (instrIs3opImul(ins))
    {
        regNumber tgtReg = inst3opImulReg(ins);
        printf("%s, ", emitRegName(tgtReg, id->idOpSize()));
    }

    switch (id->idInsFmt())
    {
        ssize_t         val;
        ssize_t         offs;
        CnsVal          cnsVal;
        const char*     methodName;

    case IF_CNS:
        val = emitGetInsSC(id);
#ifdef _TARGET_AMD64_
        // no 8-byte immediates allowed here!
        assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
#endif
#ifdef RELOC_SUPPORT
        if (id->idIsCnsReloc())
            emitDispReloc(val);
        else
#endif
        {
PRINT_CONSTANT:
            // Munge any pointers if we want diff-able disassembly
            if (emitComp->opts.disDiffable)
            {
                ssize_t top12bits = (val >> 20);
                if ((top12bits != 0) && (top12bits != -1))
                    val = 0xD1FFAB1E;
            }
            if ((val > -1000) && (val < 1000))
                printf("%d", val);
            else if ((val > 0) || ((val & 0x7F000000) != 0x7F000000))
                printf("0x%IX", val);
            else // (val < 0)
                printf("-0x%IX", -val);
        }
        break;

    case IF_ARD:
    case IF_AWR:
    case IF_ARW:

#if FEATURE_STACK_FP_X87

    case IF_TRD_ARD:
    case IF_TWR_ARD:
    case IF_TRW_ARD:

//  case IF_ARD_TRD:
    case IF_AWR_TRD:
//  case IF_ARW_TRD:

#endif // FEATURE_STACK_FP_X87
        if  (ins == INS_call && id->idIsCallRegPtr())
        {
            printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
            break;
        }

        printf("%s", sstr);
        emitDispAddrMode(id, isNew);
        emitDispShift(ins);

        if  (ins == INS_call)
        {
            assert(id->idInsFmt() == IF_ARD);

            /* Ignore indirect calls */

            if  (id->idDebugOnlyInfo()->idMemCookie == 0)
                break;

            assert(id->idDebugOnlyInfo()->idMemCookie);

            /* This is a virtual call */

            methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
            printf("%s", methodName);
        }
        break;

    case IF_RRD_ARD:
    case IF_RWR_ARD:
    case IF_RRW_ARD:
        if  (IsAVXInstruction(ins))
        {
            printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr);
        }
        else if  (IsSSE2Instruction(ins))
        {
            printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr);
        }
        else
#ifdef _TARGET_AMD64_
        if  (ins == INS_movsxd)
        {
            printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
        }
        else
#endif
        if  (ins == INS_movsx || ins == INS_movzx)
        {
            printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr);
        }
        else
        {
            printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
        }
        emitDispAddrMode(id);
        break;

    case IF_ARD_RRD:
    case IF_AWR_RRD:
    case IF_ARW_RRD:

        printf("%s", sstr);
        emitDispAddrMode(id);
        if (IsAVXInstruction(ins))
        {
            printf(", %s", emitYMMregName((unsigned)id->idReg1()));
        }
        else if (IsSSE2Instruction(ins))
        {
            printf(", %s", emitXMMregName((unsigned)id->idReg1()));
        }
        else
        {
            printf(", %s", emitRegName(id->idReg1(), attr));
        }
        break;

    case IF_ARD_CNS:
    case IF_AWR_CNS:
    case IF_ARW_CNS:
    case IF_ARW_SHF:

        printf("%s", sstr);
        emitDispAddrMode(id);
        emitGetInsAmdCns(id, &cnsVal);
        val = cnsVal.cnsVal;
#ifdef _TARGET_AMD64_
        // no 8-byte immediates allowed here!
        assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
#endif
        if  (id->idInsFmt() == IF_ARW_SHF)
            emitDispShift(ins, (BYTE)val);
        else
        {
            printf(", ");
#ifdef RELOC_SUPPORT
            if (cnsVal.cnsReloc)
                emitDispReloc(val);
            else
#endif
                goto PRINT_CONSTANT;
        }
        break;

    case IF_SRD:
    case IF_SWR:
    case IF_SRW:

#if FEATURE_STACK_FP_X87
    case IF_TRD_SRD:
    case IF_TWR_SRD:
    case IF_TRW_SRD:

//  case IF_SRD_TRD:
    case IF_SWR_TRD:
//  case IF_SRW_TRD:

#endif // FEATURE_STACK_FP_X87

        printf("%s", sstr);

#if !FEATURE_FIXED_OUT_ARGS
        if  (ins == INS_pop) emitCurStackLvl -= sizeof(int);
#endif

        emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(),
                         id->idAddr()->iiaLclVar.lvaOffset(),
                         id->idDebugOnlyInfo()->idVarRefOffs, asmfm);

#if !FEATURE_FIXED_OUT_ARGS
        if  (ins == INS_pop) emitCurStackLvl += sizeof(int);
#endif

        emitDispShift(ins);
        break;

    case IF_SRD_RRD:
    case IF_SWR_RRD:
    case IF_SRW_RRD:

        printf("%s", sstr);

        emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(),
                         id->idAddr()->iiaLclVar.lvaOffset(),
                         id->idDebugOnlyInfo()->idVarRefOffs, asmfm);

        if (IsAVXInstruction(ins))
        {
            printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr);
        }
        else if (IsSSE2Instruction(ins))
        {
            printf(", %s", emitXMMregName((unsigned)id->idReg1()), sstr);
        }
        else
        {
            printf(", %s", emitRegName(id->idReg1(), attr));
        }
        break;

    case IF_SRD_CNS:
    case IF_SWR_CNS:
    case IF_SRW_CNS:
    case IF_SRW_SHF:

        printf("%s", sstr);

        emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(),
                         id->idAddr()->iiaLclVar.lvaOffset(),
                         id->idDebugOnlyInfo()->idVarRefOffs, asmfm);

        emitGetInsCns(id, &cnsVal);
        val = cnsVal.cnsVal;
#ifdef _TARGET_AMD64_
        // no 8-byte immediates allowed here!
        assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
#endif
        if  (id->idInsFmt() == IF_SRW_SHF)
            emitDispShift(ins, (BYTE)val);
        else
        {
            printf(", ");
#ifdef RELOC_SUPPORT
            if (cnsVal.cnsReloc)
            {
                emitDispReloc(val);
            }
            else
#endif
            {
                goto PRINT_CONSTANT;
            }
        }
        break;

    case IF_RRD_SRD:
    case IF_RWR_SRD:
    case IF_RRW_SRD:
        if (IsAVXInstruction(ins))
        {
            printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr);
        }
        else if (IsSSE2Instruction(ins))
        {
            printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr);
        }
        else
#ifdef _TARGET_AMD64_
        if  (ins == INS_movsxd)
        {
            printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
        }
        else
#endif
        if  (ins == INS_movsx || ins == INS_movzx)
        {
            printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr);
        }
        else
        {
            printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
        }

        emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(),
                        id->idAddr()->iiaLclVar.lvaOffset(),
                        id->idDebugOnlyInfo()->idVarRefOffs, asmfm);

        break;

    case IF_RRD_RRD:
    case IF_RWR_RRD:
    case IF_RRW_RRD:

        if  (ins == INS_mov_i2xmm)
        {
            printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), emitRegName(id->idReg2(), attr));
        }
        else if  (ins == INS_mov_xmm2i)
        {
            printf("%s, %s", emitRegName(id->idReg2(), attr), emitXMMregName((unsigned)id->idReg1()));
        }
        else if  (ins == INS_cvttsd2si)
        {
            printf(" %s, %s",  emitRegName(id->idReg1(), attr), emitXMMregName((unsigned)id->idReg2()));
        }
        else if  (IsAVXInstruction(ins))
        {
            printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), emitYMMregName((unsigned)id->idReg2()));
        }
        else if  (IsSSE2Instruction(ins))
        {
            printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), emitXMMregName((unsigned)id->idReg2()));
        }
#ifdef _TARGET_AMD64
        else if  (ins == INS_movsxd)
        {
            printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE),
                             emitRegName(id->idReg2(), EA_4BYTE));
        }
#endif // _TARGET_AMD64
        else if  (ins == INS_movsx || ins == INS_movzx)
        {
            printf("%s, %s", emitRegName(id->idReg1(),  EA_PTRSIZE),
                             emitRegName(id->idReg2(),  attr));
        }
        else
        {
            printf("%s, %s", emitRegName(id->idReg1(),  attr),
                             emitRegName(id->idReg2(),  attr));
        }
        break;

    case IF_RRW_RRW:
        assert(ins == INS_xchg);
        printf("%s,", emitRegName(id->idReg1(), attr));
        printf(" %s", emitRegName(id->idReg2(), attr));
        break;

#ifdef FEATURE_AVX_SUPPORT
    case IF_RWR_RRD_RRD:
        assert(IsAVXInstruction(ins));
        assert(IsThreeOperandAVXInstruction(ins));
        printf("%s, ", emitRegName(id->idReg1(), attr));
        printf("%s, ", emitRegName(id->idReg2(), attr));
        printf("%s", emitRegName(id->idReg3(), attr));
        break;
#endif
    case IF_RRW_RRW_CNS:
        if  (IsAVXInstruction(ins))
        {
            printf("%s,", emitYMMregName((unsigned)id->idReg1()), attr);
            printf(" %s", emitYMMregName((unsigned)id->idReg2()), attr);
        }
        else
        {
            printf("%s,", emitRegName(id->idReg1(), attr));
            printf(" %s", emitRegName(id->idReg2(), attr));
        }
        val = emitGetInsSC(id);
#ifdef _TARGET_AMD64_
        // no 8-byte immediates allowed here!
        assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
#endif
        printf(", ");
#ifdef RELOC_SUPPORT
        if (id->idIsCnsReloc())
            emitDispReloc(val);
        else
#endif
            goto PRINT_CONSTANT;
        break;

    case IF_RRD:
    case IF_RWR:
    case IF_RRW:
        printf("%s", emitRegName(id->idReg1(), attr));
        emitDispShift(ins);
        break;

    case IF_RRW_SHF:
        printf("%s", emitRegName(id->idReg1(), attr));
        emitDispShift(ins, (BYTE)emitGetInsSC(id));
        break;

    case IF_RRD_MRD:
    case IF_RWR_MRD:
    case IF_RRW_MRD:

        if (ins == INS_movsx || ins == INS_movzx)
            attr = EA_PTRSIZE;
#ifdef _TARGET_AMD64_
        else if  (ins == INS_movsxd)
        {
            attr = EA_PTRSIZE;
        }
#endif
        if (IsAVXInstruction(ins))
        {
            printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr);
        }
        else if (IsSSE2Instruction(ins))
        {
            printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr);
        }
        else
        {
            printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
        }
        offs = emitGetInsDsp(id);
        emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
        break;

    case IF_RWR_MRD_OFF:

        printf("%s, %s", emitRegName(id->idReg1(), attr), "offset");
        offs = emitGetInsDsp(id);
        emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
        break;

    case IF_MRD_RRD:
    case IF_MWR_RRD:
    case IF_MRW_RRD:

        printf("%s", sstr);
        offs = emitGetInsDsp(id);
        emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
        printf(", %s", emitRegName(id->idReg1(), attr));
        break;

    case IF_MRD_CNS:
    case IF_MWR_CNS:
    case IF_MRW_CNS:
    case IF_MRW_SHF:

        printf("%s", sstr);
        offs = emitGetInsDsp(id);
        emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
        emitGetInsDcmCns(id, &cnsVal);
        val = cnsVal.cnsVal;
#ifdef _TARGET_AMD64_
        // no 8-byte immediates allowed here!
        assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
#endif
#ifdef RELOC_SUPPORT
        if (cnsVal.cnsReloc)
            emitDispReloc(val);
        else
#endif
        if  (id->idInsFmt() == IF_MRW_SHF)
            emitDispShift(ins, (BYTE)val);
        else
        {
            printf(", ");
            goto PRINT_CONSTANT;
        }
        break;

    case IF_MRD:
    case IF_MWR:
    case IF_MRW:

#if FEATURE_STACK_FP_X87

    case IF_TRD_MRD:
    case IF_TWR_MRD:
    case IF_TRW_MRD:

//  case IF_MRD_TRD:
    case IF_MWR_TRD:
//  case IF_MRW_TRD:

#endif // FEATURE_STACK_FP_X87

        printf("%s", sstr);
        offs = emitGetInsDsp(id);
        emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
        emitDispShift(ins);
        break;

    case IF_MRD_OFF:

        printf("offset ");
        offs = emitGetInsDsp(id);
        emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
        break;

    case IF_RRD_CNS:
    case IF_RWR_CNS:
    case IF_RRW_CNS:
        printf("%s, ", emitRegName(id->idReg1(), attr));
        val = emitGetInsSC(id);
#ifdef RELOC_SUPPORT
        if (id->idIsCnsReloc())
            emitDispReloc(val);
        else
#endif
             goto PRINT_CONSTANT;
        break;

#if FEATURE_STACK_FP_X87
    case IF_TRD_FRD:
    case IF_TWR_FRD:
    case IF_TRW_FRD:
        switch (ins)
        {
        case INS_fld:
        case INS_fxch:
            break;

        default:
            printf("%s, ", emitFPregName(0));
            break;
        }
        printf("%s", emitFPregName((unsigned)id->idReg1()));
        break;

    case IF_FRD_TRD:
    case IF_FWR_TRD:
    case IF_FRW_TRD:
        printf("%s", emitFPregName((unsigned)id->idReg1()));
        if  (ins != INS_fst && ins != INS_fstp)
            printf(", %s", emitFPregName(0));
        break;
#endif // FEATURE_STACK_FP_X87

    case IF_LABEL:
    case IF_RWR_LABEL:
    case IF_SWR_LABEL:

        if  (ins == INS_lea)
        {
            printf("%s, ", emitRegName(id->idReg1(), attr));
        }
        else if (ins == INS_mov)
        {
            /* mov   dword ptr [frame.callSiteReturnAddress], label */
            assert(id->idInsFmt() == IF_SWR_LABEL);
            instrDescLbl* idlbl = (instrDescLbl*)id;

            emitDispFrameRef(idlbl->dstLclVar.lvaVarNum(),
                             idlbl->dstLclVar.lvaOffset(),
                             0,
                             asmfm);

            printf(", ");
        }

        if  (((instrDescJmp*)id)->idjShort)
            printf("SHORT ");

        if  (id->idIsBound())
        {
            printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
        }
        else
        {
            printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
        }
        break;

    case IF_METHOD:
    case IF_METHPTR:
        if (id->idIsCallAddr())
        {
            offs = (ssize_t)id->idAddr()->iiaAddr;
            methodName = "";
        }
        else
        {
            offs = 0;
            methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
        }

        if  (id->idInsFmt() == IF_METHPTR)
            printf("[");

        if (offs)
        {
            if (id->idIsDspReloc())
                printf("reloc ");
            printf("%08X", offs);
        }
        else
        {
            printf("%s", methodName);
        }

        if  (id->idInsFmt() == IF_METHPTR)
            printf("]");

        break;

#if FEATURE_STACK_FP_X87
    case IF_TRD:
    case IF_TWR:
    case IF_TRW:
#endif // FEATURE_STACK_FP_X87
    case IF_NONE:
        break;

    default:
        printf("unexpected format %s", emitIfName(id->idInsFmt()));
        assert(!"unexpectedFormat");
        break;
    }

    if (sz != 0 && sz != id->idCodeSize() && (!asmfm || emitComp->verbose))
    {
        // Code size in the instrDesc is different from the actual code size we've been given!
        printf(" (ECS:%d, ACS:%d)", id->idCodeSize(), sz);
    }

    printf("\n");
}

/*****************************************************************************/
#endif

/*****************************************************************************
 *
 *  Output nBytes bytes of NOP instructions
 */

static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes)
{
    assert(nBytes <= 15);

#ifndef _TARGET_AMD64_
    // TODO-X86-CQ: when VIA C3 CPU's are out of circulation, switch to the
    // more efficient real NOP: 0x0F 0x1F +modR/M
    // Also can't use AMD recommended, multiple size prefixes (i.e. 0x66 0x66 0x90 for 3 byte NOP)
    // because debugger and msdis don't like it, so maybe VIA doesn't either
    // So instead just stick to repeating single byte nops

    switch (nBytes)
    {
    case 15:
        *dst++ = 0x90;
        __fallthrough;
    case 14:
        *dst++ = 0x90;
        __fallthrough;
    case 13:
        *dst++ = 0x90;
        __fallthrough;
    case 12:
        *dst++ = 0x90;
        __fallthrough;
    case 11:
        *dst++ = 0x90;
        __fallthrough;
    case 10:
        *dst++ = 0x90;
        __fallthrough;
    case 9:
        *dst++ = 0x90;
        __fallthrough;
    case 8:
        *dst++ = 0x90;
        __fallthrough;
    case 7:
        *dst++ = 0x90;
        __fallthrough;
    case 6:
        *dst++ = 0x90;
        __fallthrough;
    case 5:
        *dst++ = 0x90;
        __fallthrough;
    case 4:
        *dst++ = 0x90;
        __fallthrough;
    case 3:
        *dst++ = 0x90;
        __fallthrough;
    case 2:
        *dst++ = 0x90;
        __fallthrough;
    case 1:
        *dst++ = 0x90;
        break;
    case 0:
        break;
    }
#else // _TARGET_AMD64_
    switch (nBytes)
    {
    case 2:
        *dst++ = 0x66;
        __fallthrough;
    case 1:
        *dst++ = 0x90;
        break;
    case 0:
        break;
    case 3:
        *dst++ = 0x0F;
        *dst++ = 0x1F;
        *dst++ = 0x00;
        break;
    case 4:
        *dst++ = 0x0F;
        *dst++ = 0x1F;
        *dst++ = 0x40;
        *dst++ = 0x00;
        break;
    case 6:
        *dst++ = 0x66;
        __fallthrough;
    case 5:
        *dst++ = 0x0F;
        *dst++ = 0x1F;
        *dst++ = 0x44;
        *dst++ = 0x00;
        *dst++ = 0x00;
        break;
    case 7:
        *dst++ = 0x0F;
        *dst++ = 0x1F;
        *dst++ = 0x80;
        *dst++ = 0x00;
        *dst++ = 0x00;
        *dst++ = 0x00;
        *dst++ = 0x00;
        break;
    case 15:
        // More than 3 prefixes is slower than just 2 NOPs
        dst = emitOutputNOP(emitOutputNOP(dst, 7), 8);
        break;
    case 14:
        // More than 3 prefixes is slower than just 2 NOPs
        dst = emitOutputNOP(emitOutputNOP(dst, 7), 7);
        break;
    case 13:
        // More than 3 prefixes is slower than just 2 NOPs
        dst = emitOutputNOP(emitOutputNOP(dst, 5), 8);
        break;
    case 12:
        // More than 3 prefixes is slower than just 2 NOPs
        dst = emitOutputNOP(emitOutputNOP(dst, 4), 8);
        break;
    case 11:
        *dst++ = 0x66;
        __fallthrough;
    case 10:
        *dst++ = 0x66;
        __fallthrough;
    case 9:
        *dst++ = 0x66;
        __fallthrough;
    case 8:
        *dst++ = 0x0F;
        *dst++ = 0x1F;
        *dst++ = 0x84;
        *dst++ = 0x00;
        *dst++ = 0x00;
        *dst++ = 0x00;
        *dst++ = 0x00;
        *dst++ = 0x00;
        break;
    }
#endif // _TARGET_AMD64_

    return dst;
}


/*****************************************************************************
 *
 *  Output an instruction involving an address mode.
 */

BYTE*       emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
{
    regNumber       reg;
    regNumber       rgx;
    ssize_t         dsp;
    bool            dspInByte;
    bool            dspIsZero;

    instruction     ins  = id->idIns();
    emitAttr        size = id->idOpSize();
    size_t          opsz = EA_SIZE_IN_BYTES(size);

    // Get the base/index registers
    reg = id->idAddr()->iiaAddrMode.amBaseReg;
    rgx = id->idAddr()->iiaAddrMode.amIndxReg;

    // For INS_call the instruction size is actually the return value size
    if (ins == INS_call)
    {
        // Special case: call via a register
        if  (id->idIsCallRegPtr())
        {
            size_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call));

            dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, opcode);
            dst += emitOutputWord(dst, opcode);
            goto DONE;
        }

        // The displacement field is in an unusual place for calls
        dsp = emitGetInsCIdisp(id);

#ifdef _TARGET_AMD64_

        // Compute the REX prefix if it exists
        if (IsExtendedReg(reg, EA_PTRSIZE))
        {
            insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
            reg = RegEncoding(reg);
        }

        if (IsExtendedReg(rgx, EA_PTRSIZE))
        {
            insEncodeRegSIB(ins, rgx, &code);
            rgx = RegEncoding(rgx);
        }

        // And emit the REX prefix
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

#endif // _TARGET_AMD64_

        goto GOT_DSP;
    }

    // Is there a large constant operand? 
    if (addc && (size > EA_1BYTE))
    {
        ssize_t cval = addc->cnsVal;

        // Does the constant fit in a byte?
        if  ((signed char)cval == cval &&
#ifdef RELOC_SUPPORT
             addc->cnsReloc == false   &&
#endif
             ins != INS_mov      &&
             ins != INS_test)
        {
            if  (id->idInsFmt() != IF_ARW_SHF)
                code |= 2;

            opsz = 1;
        }
    }

    // Emit VEX prefix if required
    // There are some callers who already add VEX prefix and call this routine.
    // Therefore, add VEX prefix is one is not already present.
    code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);

    // For this format, moves do not support a third operand, so we only need to handle the binary ops.
    if (IsThreeOperandBinaryAVXInstruction(ins))
    {
        // Encode source operand reg in 'vvvv' bits in 1's compliement form
        // The order of operands are reversed, therefore use reg2 as the source.
        code = insEncodeReg3456(ins, id->idReg1(), size, code);        
    }

    // Emit the REX prefix if required
    if (TakesRexWPrefix(ins, size))
    {
        code = AddRexWPrefix(ins, code);
    }

    if (IsExtendedReg(reg, EA_PTRSIZE))
    {
        insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
        reg = RegEncoding(reg);
    }

    if (IsExtendedReg(rgx, EA_PTRSIZE))
    {
        insEncodeRegSIB(ins, rgx, &code);
        rgx = RegEncoding(rgx);
    }

    // Is this a 'big' opcode?
    if (code & 0xFF000000)
    {
        // Output the REX prefix
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

        // Output the highest word of the opcode
        // We need to check again as in case of AVX instructions leading opcode bytes are stripped off 
        // and encoded as part of VEX prefix.
        if (code & 0xFF000000)
        {
            dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF;
        }
    }
    else if (code & 0x00FF0000)
    {
        // Output the REX prefix
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

        // Output the highest byte of the opcode
        if (code & 0x00FF0000)
        {
            dst += emitOutputByte(dst, code >> 16); code &= 0x0000FFFF;
        }

        // Use the large version if this is not a byte. This trick will not
        // work in case of SSE2 and AVX instructions.
        if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
            code++;
    }
    else if (CodeGen::instIsFP(ins))
    {
#if FEATURE_STACK_FP_X87
        assert(size == EA_4BYTE   ||
               size == EA_8BYTE   ||
               ins  == INS_fldcw  ||
               ins  == INS_fnstcw);
#else // !FEATURE_STACK_FP_X87
        assert(size == EA_4BYTE   ||
               size == EA_8BYTE);
#endif // ! FEATURE_STACK_FP_X87

        if  (size == EA_8BYTE)
            code += 4;
    }
    else if (!IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
    {
        /* Is the operand size larger than a byte? */

        switch (size)
        {
        case EA_1BYTE:
            break;

        case EA_2BYTE:

            /* Output a size prefix for a 16-bit operand */

            dst += emitOutputByte(dst, 0x66);

            __fallthrough;

        case EA_4BYTE: AMD64_ONLY(case EA_8BYTE:)

            /* Set the 'w' bit to get the large version */

            code |= 0x1;
            break;

#ifdef _TARGET_X86_
        case EA_8BYTE:

            /* Double operand - set the appropriate bit */

            code |= 0x04;
            break;

#endif // _TARGET_X86_

        default:
            NO_WAY("unexpected size");
            break;
        }
    }

    // Output the REX prefix
    dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

    // Get the displacement value
    dsp = emitGetInsAmdAny(id);

GOT_DSP:

    dspInByte = ((signed char)dsp == (ssize_t)dsp);
    dspIsZero = (dsp == 0);

#ifdef RELOC_SUPPORT
    if (id->idIsDspReloc())
    {
        dspInByte = false;      // relocs can't be placed in a byte
    }
#endif

    // Is there a [scaled] index component?
    if  (rgx == REG_NA)
    {
        // The address is of the form "[reg+disp]"
        switch (reg)
        {
        case REG_NA:
            if (id->idIsDspReloc())
            {
                INT32 addlDelta = 0;

                // The address is of the form "[disp]" 
                // On x86 - disp is relative to zero
                // On Amd64 - disp is relative to RIP
                dst += emitOutputWord(dst, code | 0x0500);

                if (addc)
                {
                    // It is of the form "ins [disp], immed" 
                    // For emitting relocation, we also need to take into account of the
                    // additional bytes of code emitted for immed val.

                    ssize_t cval = addc->cnsVal;

#ifdef _TARGET_AMD64_
                    // all these opcodes only take a sign-extended 4-byte immediate
                    noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
#else 
                    noway_assert(opsz <= 4);
#endif

                    switch (opsz)
                    {
                    case 0:
                    case 4:
                    case 8: addlDelta = -4; break;
                    case 2: addlDelta = -2; break;
                    case 1: addlDelta = -1; break;

                    default:
                        assert(!"unexpected operand size");
                        unreached();
                    }
                }

#ifdef _TARGET_AMD64_
                // We emit zero on Amd64, to avoid the assert in emitOutputLong()
                dst += emitOutputLong(dst, 0);
#else
                dst += emitOutputLong(dst, dsp);
#endif
                emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_DISP32, 0, addlDelta);
            }
            else
            {
#ifdef _TARGET_X86_
                dst += emitOutputWord(dst, code | 0x0500);                
#else //_TARGET_AMD64_
                // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
                // This addr mode should never be used while generating relocatable ngen code nor if
                // the addr can be encoded as pc-relative address.
                noway_assert(!emitComp->opts.compReloc);
                noway_assert(codeGen->genAddrRelocTypeHint((size_t)dsp) != IMAGE_REL_BASED_REL32);
                noway_assert((int)dsp == dsp);

                // This requires, specifying a SIB byte after ModRM byte.
                dst += emitOutputWord(dst, code | 0x0400);
                dst += emitOutputByte(dst, 0x25);
#endif //_TARGET_AMD64_
                dst += emitOutputLong(dst, dsp);
            }
            break;


        case REG_EBP:
            // Does the offset fit in a byte?
            if  (dspInByte)
            {
                dst += emitOutputWord(dst, code | 0x4500);
                dst += emitOutputByte(dst, dsp);
            }
            else
            {
                dst += emitOutputWord(dst, code | 0x8500);
                dst += emitOutputLong(dst, dsp);

#ifdef RELOC_SUPPORT
                if (id->idIsDspReloc())
                {
                    emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
                }
#endif
            }
            break;

        case REG_ESP:
#ifdef LEGACY_BACKEND 
            // REG_ESP could be REG_R12, which applies to any instruction
            //
            // This assert isn't too helpful from the OptJit point of view
            //
            // a better question is why is it here at all
            //
            assert((ins == INS_lea)  ||
                   (ins == INS_mov)  ||
                   (ins == INS_test) ||
                   (ins == INS_cmp)  ||
                   (ins == INS_fld   && dspIsZero) ||
                   (ins == INS_fstp  && dspIsZero) ||
                   (ins == INS_fistp && dspIsZero) ||
                   IsSSE2Instruction(ins) ||
                   IsAVXInstruction(ins)  ||
                   (ins == INS_or));
#endif // LEGACY_BACKEND

            // Is the offset 0 or does it at least fit in a byte?
            if  (dspIsZero)
            {
                dst += emitOutputWord(dst, code | 0x0400);
                dst += emitOutputByte(dst, 0x24);
            }
            else if     (dspInByte)
            {
                dst += emitOutputWord(dst, code | 0x4400);
                dst += emitOutputByte(dst, 0x24);
                dst += emitOutputByte(dst, dsp);
            }
            else
            {
                dst += emitOutputWord(dst, code | 0x8400);
                dst += emitOutputByte(dst, 0x24);
                dst += emitOutputLong(dst, dsp);
#ifdef RELOC_SUPPORT
                if (id->idIsDspReloc())
                {
                    emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
                }
#endif
            }
            break;

        default:
            // Put the register in the opcode
            code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) << 8;

            // Is there a displacement?
            if  (dspIsZero)
            {
                // This is simply "[reg]"
                dst += emitOutputWord(dst, code);
            }
            else
            {
                // This is [reg + dsp]" -- does the offset fit in a byte?
                if  (dspInByte)
                {
                    dst += emitOutputWord(dst, code | 0x4000);
                    dst += emitOutputByte(dst, dsp);
                }
                else
                {
                    dst += emitOutputWord(dst, code | 0x8000);
                    dst += emitOutputLong(dst, dsp);
#ifdef RELOC_SUPPORT
                    if (id->idIsDspReloc())
                    {
                        emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
                    }
#endif
                }
            }

            break;
        }
    }
    else
    {
        unsigned    regByte;

        // We have a scaled index operand
        unsigned    mul = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);

        // Is the index operand scaled?
        if (mul > 1)
        {
            // Is there a base register?
            if (reg != REG_NA)
            {
                // The address is "[reg + {2/4/8} * rgx + icon]" 
                regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) |
                          insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);

                // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0]
                if  (dspIsZero && reg != REG_EBP)
                {
                    // The address is "[reg + {2/4/8} * rgx]"
                    dst += emitOutputWord(dst, code | 0x0400);
                    dst += emitOutputByte(dst, regByte);
                }
                else
                {
                    // The address is "[reg + {2/4/8} * rgx + disp]"
                    if  (dspInByte)
                    {
                        dst += emitOutputWord(dst, code | 0x4400);
                        dst += emitOutputByte(dst, regByte);
                        dst += emitOutputByte(dst, dsp);
                    }
                    else
                    {
                        dst += emitOutputWord(dst, code | 0x8400);
                        dst += emitOutputByte(dst, regByte);
                        dst += emitOutputLong(dst, dsp);
#ifdef RELOC_SUPPORT
                        if (id->idIsDspReloc())
                        {
                            emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
                        }
#endif
                    }
                }
            }
            else
            {
                // The address is "[{2/4/8} * rgx + icon]"
                regByte = insEncodeReg012(ins, REG_EBP, EA_PTRSIZE, nullptr) |
                          insEncodeReg345(ins, rgx  , EA_PTRSIZE, nullptr) | insSSval(mul);

                dst += emitOutputWord(dst, code | 0x0400);
                dst += emitOutputByte(dst, regByte);

                // Special case: jump through a jump table
                if  (ins == INS_i_jmp)
                    dsp += (size_t)emitConsBlock;

                dst += emitOutputLong(dst, dsp);
#ifdef RELOC_SUPPORT
                if (id->idIsDspReloc())
                {
                    emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
                }
#endif
            }
        }
        else
        {
            // The address is "[reg+rgx+dsp]"
            regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) |
                      insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr);

            if  (dspIsZero && reg != REG_EBP)
            {
                // This is [reg+rgx]"
                dst += emitOutputWord(dst, code | 0x0400);
                dst += emitOutputByte(dst, regByte);
            }
            else
            {
                // This is [reg+rgx+dsp]" -- does the offset fit in a byte?
                if  (dspInByte)
                {
                    dst += emitOutputWord(dst, code | 0x4400);
                    dst += emitOutputByte(dst, regByte);
                    dst += emitOutputByte(dst, dsp);
                }
                else
                {
                    dst += emitOutputWord(dst, code | 0x8400);
                    dst += emitOutputByte(dst, regByte);
                    dst += emitOutputLong(dst, dsp);
#ifdef RELOC_SUPPORT
                    if (id->idIsDspReloc())
                    {
                        emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
                    }
#endif
                }
            }
        }
    }

    // Now generate the constant value, if present
    if  (addc)
    {
        ssize_t cval = addc->cnsVal;

#ifdef _TARGET_AMD64_
        // all these opcodes only take a sign-extended 4-byte immediate
       noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
#endif

        switch (opsz)
        {
        case 0:
        case 4:
        case 8: dst += emitOutputLong(dst, cval); break;
        case 2: dst += emitOutputWord(dst, cval); break;
        case 1: dst += emitOutputByte(dst, cval); break;

        default:
            assert(!"unexpected operand size");
        }

#ifdef RELOC_SUPPORT
        if (addc->cnsReloc)
        {
            emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
            assert(opsz == 4);
        }
#endif
    }

DONE:

    // Does this instruction operate on a GC ref value?
    if  (id->idGCref())
    {
        switch (id->idInsFmt())
        {
        case IF_ARD:
        case IF_AWR:
        case IF_ARW:
            break;

        case IF_RRD_ARD:
            break;

        case IF_RWR_ARD:
            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
            break;

        case IF_RRW_ARD:
            assert(id->idGCref() == GCT_BYREF);

#ifdef DEBUG
            regMaskTP regMask;
            regMask = genRegMask(id->idReg1());

            // <BUGNUM> VSW 335101 </BUGNUM>
            // Either id->idReg1(), id->idAddr()->iiaAddrMode.amBaseReg, or id->idAddr()->iiaAddrMode.amIndxReg
            // could be a BYREF.
            // For example in the following case:
            //     mov     EDX, bword ptr [EBP-78H] ; EDX becomes BYREF after this instr.
            //     add     EAX, bword ptr [EDX+8]   ; It is the EDX that's causing id->idGCref to be GCT_BYREF.
            //                                      ; EAX becomes BYREF after this instr.
            // <BUGNUM> DD 273707 </BUGNUM>
            //     add     EDX, bword ptr [036464E0H] ; int + static field (technically a GCREF)=BYREF
            regMaskTP baseRegMask;
            if (reg == REG_NA)
            {
                baseRegMask = RBM_NONE;
            }
            else
            {
                baseRegMask = genRegMask(reg);
            }
            regMaskTP indexRegMask;
            if (rgx == REG_NA)
            {
                indexRegMask = RBM_NONE;
            }
            else
            {
                indexRegMask = genRegMask(rgx);
            }

            // r1 could have been a GCREF as GCREF + int=BYREF
            //                            or BYREF+/-int=BYREF
            assert(((reg == REG_NA) && (rgx == REG_NA)                            && (ins == INS_add || ins == INS_sub)) ||
                   (( (regMask | baseRegMask | indexRegMask) & emitThisGCrefRegs) && (ins == INS_add                  )) ||
                   (( (regMask | baseRegMask | indexRegMask) & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
#endif
            // Mark it as holding a GCT_BYREF
            emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
            break;

        case IF_ARD_RRD:
        case IF_AWR_RRD:
            break;

        case IF_ARD_CNS:
        case IF_AWR_CNS:
            break;

        case IF_ARW_RRD:
        case IF_ARW_CNS:
            assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
            break;

        default:
#ifdef  DEBUG
            emitDispIns(id, false, false, false);
#endif
            assert(!"unexpected GC ref instruction format");
        }

        // mul can never produce a GC ref
        assert(!instrIs3opImul(ins));
        assert(ins != INS_mulEAX && ins != INS_imulEAX);
    }
    else
    {
        if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
        {
        }
        else
        {
            switch (id->idInsFmt())
            {
            case IF_RWR_ARD:
                emitGCregDeadUpd(id->idReg1(), dst);
                break;
            default:
                break;
            }

            if (ins == INS_mulEAX || ins == INS_imulEAX)
            {
                emitGCregDeadUpd(REG_EAX, dst);
                emitGCregDeadUpd(REG_EDX, dst);
            }

            // For the three operand imul instruction the target register
            // is encoded in the opcode

            if (instrIs3opImul(ins))
            {
                regNumber tgtReg = inst3opImulReg(ins);
                emitGCregDeadUpd(tgtReg, dst);
            }
        }
    }

    return  dst;
}

/*****************************************************************************
 *
 *  Output an instruction involving a stack frame value.
 */

BYTE*       emitter::emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
{
    int             adr;
    int             dsp;
    bool            EBPbased;
    bool            dspInByte;
    bool            dspIsZero;

    instruction     ins  = id->idIns();
    emitAttr        size = id->idOpSize();
    size_t          opsz = EA_SIZE_IN_BYTES(size);

    assert(ins != INS_imul || id->idReg1() == REG_EAX || size == EA_4BYTE || size == EA_8BYTE);

    // Is there a large constant operand?
    if  (addc && (size > EA_1BYTE))
    {
        ssize_t cval = addc->cnsVal;

        // Does the constant fit in a byte?
        if  ((signed char)cval == cval &&
#ifdef RELOC_SUPPORT
             addc->cnsReloc == false   &&
#endif
             ins != INS_mov            &&
             ins != INS_test)
        {
            if  (id->idInsFmt() != IF_SRW_SHF)
                code |= 2;

            opsz = 1;
        }
    }

    // Add VEX prefix if required.
    // There are some callers who already add VEX prefix and call this routine.
    // Therefore, add VEX prefix is one is not already present.
    code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);

    // Compute the REX prefix
    if (TakesRexWPrefix(ins, size))
    {
        code = AddRexWPrefix(ins, code);
    }
    
    // Special case emitting AVX instructions
    if (Is4ByteAVXInstruction(ins))
    {
        size_t regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
        
        // Emit last opcode byte
        assert((code & 0xFF) == 0);
        dst += emitOutputByte(dst, (code >> 8) & 0xFF);
        code = regcode;
    }
    // Is this a 'big' opcode?
    else if  (code & 0xFF000000)
    {
        // Output the REX prefix
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

        // Output the highest word of the opcode 
        // We need to check again because in case of AVX instructions the leading
        // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
        if (code & 0xFF000000)
        {
            dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF;
        }
    }
    else if (code & 0x00FF0000)
    {
        // Output the REX prefix
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

        // Output the highest byte of the opcode.
        // We need to check again because in case of AVX instructions the leading
        // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
        if (code & 0x00FF0000)
        {
            dst += emitOutputByte(dst, code >> 16); 
            code &= 0x0000FFFF;
        }

        // Use the large version if this is not a byte
        if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins))
            && !IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
            code |= 0x1;
    }
    else if (CodeGen::instIsFP(ins))
    {
        assert(size == EA_4BYTE || size == EA_8BYTE);

        if  (size == EA_8BYTE)
            code += 4;
    }
    else if (!IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
    {
        // Is the operand size larger than a byte?
        switch (size)
        {
        case EA_1BYTE:
            break;

        case EA_2BYTE:
            // Output a size prefix for a 16-bit operand
            dst += emitOutputByte(dst, 0x66);
            __fallthrough;

        case EA_4BYTE:
#ifdef _TARGET_AMD64_
        case EA_8BYTE:
#endif // _TARGET_AMD64_

            /* Set the 'w' size bit to indicate 32-bit operation
             * Note that incrementing "code" for INS_call (0xFF) would
             * overflow, whereas setting the lower bit to 1 just works out 
             */

            code |= 0x01;
            break;

#ifdef _TARGET_X86_
        case EA_8BYTE:

            // Double operand - set the appropriate bit.
            // I don't know what a legitimate reason to end up in this case would be
            // considering that FP is taken care of above...
            // what is an instruction that takes a double which is not covered by the
            // above instIsFP? Of the list in instrsxarch, only INS_fprem
            code |= 0x04;
            NO_WAY("bad 8 byte op");
            break;
#endif // _TARGET_X86_

        default:
            NO_WAY("unexpected size");
            break;
        }
    }

    // Output the REX prefix
    dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

    // Figure out the variable's frame position
    int varNum = id->idAddr()->iiaLclVar.lvaVarNum();

    adr = emitComp->lvaFrameAddress(varNum, &EBPbased);
    dsp = adr + id->idAddr()->iiaLclVar.lvaOffset();

    dspInByte = ((signed char)dsp == (int)dsp);
    dspIsZero = (dsp == 0);

#ifdef RELOC_SUPPORT
    // for stack varaibles the dsp should never be a reloc
    assert(id->idIsDspReloc() == 0);
#endif

    if  (EBPbased)
    {
        // EBP-based variable: does the offset fit in a byte?
        if (Is4ByteAVXInstruction(ins))
        {
            if  (dspInByte)
            {       
                dst += emitOutputByte(dst, code | 0x45);
                dst += emitOutputByte(dst, dsp);
            }
            else
            {
                dst += emitOutputByte(dst, code | 0x85);
                dst += emitOutputLong(dst, dsp);
            }
        }
        else 
        {
            if  (dspInByte)
            {       
                dst += emitOutputWord(dst, code | 0x4500);
                dst += emitOutputByte(dst, dsp);
            }
            else
            {
                dst += emitOutputWord(dst, code | 0x8500);
                dst += emitOutputLong(dst, dsp);
            }
        }
    }
    else
    {

#if !FEATURE_FIXED_OUT_ARGS
        // Adjust the offset by the amount currently pushed on the CPU stack
        dsp += emitCurStackLvl;
#endif

        dspInByte = ((signed char)dsp == (int)dsp);
        dspIsZero = (dsp == 0);

        // Does the offset fit in a byte?
        if (Is4ByteAVXInstruction(ins))
        {
            if (dspInByte)
            {
                if  (dspIsZero)
                {
                    dst += emitOutputByte(dst, code | 0x04);
                    dst += emitOutputByte(dst, 0x24);
                }
                else
                {
                    dst += emitOutputByte(dst, code | 0x44);
                    dst += emitOutputByte(dst, 0x24);
                    dst += emitOutputByte(dst, dsp);
                }
            }
            else
            {
                dst += emitOutputByte(dst, code | 0x84);
                dst += emitOutputByte(dst, 0x24);
                dst += emitOutputLong(dst, dsp);
            }
        }
        else
        {
            if (dspInByte)
            {
                if  (dspIsZero)
                {
                    dst += emitOutputWord(dst, code | 0x0400);
                    dst += emitOutputByte(dst, 0x24);
                }
                else
                {
                    dst += emitOutputWord(dst, code | 0x4400);
                    dst += emitOutputByte(dst, 0x24);
                    dst += emitOutputByte(dst, dsp);
                }
            }
            else
            {
                dst += emitOutputWord(dst, code | 0x8400);
                dst += emitOutputByte(dst, 0x24);
                dst += emitOutputLong(dst, dsp);
            }
        }
    }

    // Now generate the constant value, if present
    if  (addc)
    {
        ssize_t cval = addc->cnsVal;

#ifdef _TARGET_AMD64_
        // all these opcodes only take a sign-extended 4-byte immediate
       noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
#endif

        switch (opsz)
        {
        case 0:
        case 4:
        case 8: dst += emitOutputLong(dst, cval); break;
        case 2: dst += emitOutputWord(dst, cval); break;
        case 1: dst += emitOutputByte(dst, cval); break;

        default:
            assert(!"unexpected operand size");
        }

#ifdef RELOC_SUPPORT
        if (addc->cnsReloc)
        {
            emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
            assert(opsz == 4);
        }
#endif
    }

    // Does this instruction operate on a GC ref value?
    if  (id->idGCref())
    {
        // Factor in the sub-variable offset
        adr += AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);

        switch (id->idInsFmt())
        {
        case IF_SRD:
            // Read  stack                    -- no change
            break;

        case IF_SWR:  // Stack Write (So we need to update GC live for stack var)
            // Write stack                    -- GC var may be born
            emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
            break;

        case IF_SRD_CNS:
            // Read  stack                    -- no change
            break;

        case IF_SWR_CNS:
            // Write stack                    -- no change
            break;

        case IF_SRD_RRD:
        case IF_RRD_SRD:
            // Read  stack   , read  register -- no change
            break;

        case IF_RWR_SRD:  // Register Write, Stack Read (So we need to update GC live for register)

            // Read  stack   , write register -- GC reg may be born
            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
            break;

        case IF_SWR_RRD:  // Stack Write, Register Read (So we need to update GC live for stack var)
            // Read  register, write stack    -- GC var may be born
            emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
            break;

        case IF_RRW_SRD:  // Register Read/Write, Stack Read (So we need to update GC live for register)

            // reg could have been a GCREF as GCREF + int=BYREF
            //                             or BYREF+/-int=BYREF
            assert(id->idGCref() == GCT_BYREF &&
                   (ins == INS_add || ins == INS_sub));
            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
            break;

        case IF_SRW_CNS:
        case IF_SRW_RRD:
            // += -= of a byref, no change

        case IF_SRW:
            break;


        default:
#ifdef  DEBUG
            emitDispIns(id, false, false, false);
#endif
            assert(!"unexpected GC ref instruction format");
        }
    }
    else
    {
        if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
        {
        }
        else
        {
            switch (id->idInsFmt())
            {
            case IF_RWR_SRD:  // Register Write, Stack Read
            case IF_RRW_SRD:  // Register Read/Write, Stack Read
                emitGCregDeadUpd(id->idReg1(), dst);
                break;
            default:
                break;
            }

            if (ins == INS_mulEAX || ins == INS_imulEAX)
            {
                emitGCregDeadUpd(REG_EAX, dst);
                emitGCregDeadUpd(REG_EDX, dst);
            }

            // For the three operand imul instruction the target register
            // is encoded in the opcode

            if (instrIs3opImul(ins))
            {
                regNumber tgtReg = inst3opImulReg(ins);
                emitGCregDeadUpd(tgtReg, dst);
            }
        }
    }

    return  dst;
}

/*****************************************************************************
 *
 *  Output an instruction with a static data member (class variable).
 */

BYTE*       emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
{
    BYTE*                   addr;
    CORINFO_FIELD_HANDLE    fldh;
    ssize_t                 offs;
    int                     doff;

    emitAttr        size = id->idOpSize();
    size_t          opsz = EA_SIZE_IN_BYTES(size);
    instruction     ins  = id->idIns();
    bool            isMoffset = false;

    // Get hold of the field handle and offset
    fldh = id->idAddr()->iiaFieldHnd;
    offs = emitGetInsDsp(id);

    // Special case: mov reg, fs:[ddd]
    if (fldh == FLD_GLOBAL_FS)
    {
        dst += emitOutputByte(dst, 0x64);
    }

    // Compute VEX prefix
    // Some of its callers already add VEX prefix and then call this routine.
    // Therefore add VEX prefix is not already present.
    code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);

    // Compute the REX prefix
    if (TakesRexWPrefix(ins, size))
    {
        code = AddRexWPrefix(ins, code);
    }

    // Is there a large constant operand?
    if  (addc && (size > EA_1BYTE))
    {
        ssize_t cval = addc->cnsVal;
        // Does the constant fit in a byte?
        if  ((signed char)cval == cval &&
#ifdef RELOC_SUPPORT
             addc->cnsReloc == false   &&
#endif
             ins != INS_mov            &&
             ins != INS_test)
        {
            if  (id->idInsFmt() != IF_MRW_SHF)
                code |= 2;

            opsz = 1;
        }
    }
#ifdef _TARGET_X86_
    else
    {
        // Special case: "mov eax, [addr]" and "mov [addr], eax"
        // Amd64: this is one case where addr can be 64-bit in size.  This is
        // currently unused or not enabled on amd64 as it always uses RIP
        // relative addressing which results in smaller instruction size.
        if (ins == INS_mov && id->idReg1() == REG_EAX)
        {
            switch (id->idInsFmt())
            {
            case IF_RWR_MRD:

                assert((unsigned)code == (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));

                code &= ~((size_t)0xFFFFFFFF);
                code |= 0xA0;
                isMoffset = true;
                break;

            case IF_MWR_RRD:

                assert((unsigned)code == (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));

                code &= ~((size_t)0xFFFFFFFF);
                code |= 0xA2;
                isMoffset = true;
                break;

            default:
                break;
            }
        }
    }
#endif //_TARGET_X86_

    // Special case emitting AVX instructions
    if (Is4ByteAVXInstruction(ins))
    {
        size_t regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
        
        // Emit last opcode byte
        // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
        assert((code & 0xFF) == 0);
        dst += emitOutputByte(dst, (code >> 8) & 0xFF);
        code = 0;

        // Emit Mod,R/M byte
        dst += emitOutputByte(dst, regcode | 0x05);
    }
    // Is this a 'big' opcode?
    else if  (code & 0xFF000000) 
    {
        // Output the REX prefix
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

        // Output the highest word of the opcode.
        // Check again since AVX instructions encode leading opcode bytes as part of VEX prefix.
        if (code & 0xFF000000) 
        {
            dst += emitOutputWord(dst, code >> 16); 
        }
        code &= 0x0000FFFF;
    }
    else if(code & 0x00FF0000)
    {
        // Output the REX prefix
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

        // Check again as VEX prefix would have encoded leading opcode byte
        if (code & 0x00FF0000)
        {
            dst += emitOutputByte(dst, code >> 16); code &= 0x0000FFFF;
        }

        if ((ins == INS_movsx || ins == INS_movzx || ins == INS_cmpxchg || ins == INS_xchg || ins == INS_xadd || insIsCMOV(ins)) &&
             size != EA_1BYTE)
        {
            // movsx and movzx are 'big' opcodes but also have the 'w' bit
            code++;
        }
    }
    else if (CodeGen::instIsFP(ins))
    {
        assert(size == EA_4BYTE || size == EA_8BYTE);

        if  (size == EA_8BYTE)
            code += 4;
    }
    else
    {
        // Is the operand size larger than a byte?
        switch (size)
        {
        case EA_1BYTE:
            break;

        case EA_2BYTE:
            // Output a size prefix for a 16-bit operand
            dst += emitOutputByte(dst, 0x66);
            __fallthrough;

        case EA_4BYTE:
#ifdef _TARGET_AMD64_
        case EA_8BYTE:
#endif
            // Set the 'w' bit to get the large version
            code |= 0x1;
            break;

#ifdef _TARGET_X86_
        case EA_8BYTE:
            // Double operand - set the appropriate bit
            code |= 0x04;
            break;
#endif // _TARGET_X86_

        default:
            assert(!"unexpected size");
        }
    }

    // Output the REX prefix
    dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

    if (code)
    {
        if  (id->idInsFmt() == IF_MRD_OFF ||
             id->idInsFmt() == IF_RWR_MRD_OFF ||
             isMoffset)
            dst += emitOutputByte(dst, code);
        else
            dst += emitOutputWord(dst, code);
    }

    // Do we have a constant or a static data member?
    doff = Compiler::eeGetJitDataOffs(fldh);
    if  (doff >= 0)
    {
        addr = emitConsBlock + doff;

        int byteSize = EA_SIZE_IN_BYTES(size);
#ifdef _TARGET_AMD64_
        // this instruction has a fixed size (4) src.  
        if (ins == INS_cvttss2si || ins == INS_cvtss2sd || ins == INS_vbroadcastss)
            byteSize = 4;
        // This has a fixed size (8) source.
        if (ins == INS_vbroadcastsd)
            byteSize = 8;
#endif

        // Check that the offset is properly aligned (i.e. the ddd in [ddd])
        assert((emitChkAlign==false) || (ins == INS_lea) 
               || (((size_t)addr & (byteSize - 1)) == 0));
    }
    else
    {
        // Special case: mov reg, fs:[ddd] or mov reg, [ddd]
        if (jitStaticFldIsGlobAddr(fldh))
        {
            addr = nullptr;
        }
        else
        {
            addr = (BYTE*)emitComp->info.compCompHnd->getFieldAddress(fldh, nullptr);
            if (addr == nullptr)
            {
                NO_WAY("could not obtain address of static field");
            }
        }
    }

    BYTE* target = (addr + offs);

    if (!isMoffset)
    {
        INT32 addlDelta = 0;

        if (addc)
        {
            // It is of the form "ins [disp], immed" 
            // For emitting relocation, we also need to take into account of the
            // additional bytes of code emitted for immed val.

            ssize_t cval = addc->cnsVal;

#ifdef _TARGET_AMD64_
            // all these opcodes only take a sign-extended 4-byte immediate
            noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
#else 
            noway_assert(opsz <= 4);
#endif

            switch (opsz)
            {
            case 0:
            case 4:
            case 8: addlDelta = -4; break;
            case 2: addlDelta = -2; break;
            case 1: addlDelta = -1; break;

            default:
                assert(!"unexpected operand size");
                unreached();
            }
        }

#ifdef _TARGET_AMD64_
        // All static field and data section constant accesses should be marked as relocatable
        noway_assert(id->idIsDspReloc());
        dst += emitOutputLong(dst, 0);
#else //_TARGET_X86_
        dst += emitOutputLong(dst, (int)target);
#endif //_TARGET_X86_

#ifdef RELOC_SUPPORT
        if (id->idIsDspReloc())
        {
            emitRecordRelocation((void*)(dst - sizeof(int)), target, IMAGE_REL_BASED_DISP32, 0, addlDelta);
        }
#endif
    }
    else
    {
#ifdef _TARGET_AMD64_
        // This code path should never be hit on amd64 since it always uses RIP relative addressing.
        // In future if ever there is a need to enable this special case, also enable the logic
        // that sets isMoffset to true on amd64.
        unreached();
#else //_TARGET_X86_

        dst += emitOutputSizeT(dst, (ssize_t)target);

#ifdef RELOC_SUPPORT
        if (id->idIsDspReloc())
        {
            emitRecordRelocation((void*)(dst - sizeof(void*)), target, IMAGE_REL_BASED_MOFFSET);
        }
#endif

#endif //_TARGET_X86_
    }

    // Now generate the constant value, if present
    if  (addc)
    {
        ssize_t cval = addc->cnsVal;

#ifdef _TARGET_AMD64_
        // all these opcodes only take a sign-extended 4-byte immediate
       noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
#endif

        switch (opsz)
        {
        case 0:
        case 4:
        case 8: dst += emitOutputLong(dst, cval); break;
        case 2: dst += emitOutputWord(dst, cval); break;
        case 1: dst += emitOutputByte(dst, cval); break;

        default:
            assert(!"unexpected operand size");
        }
#ifdef RELOC_SUPPORT
        if (addc->cnsReloc)
        {
            emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
            assert(opsz == 4);
        }
#endif
    }

    // Does this instruction operate on a GC ref value?
    if  (id->idGCref())
    {
        switch (id->idInsFmt())
        {
        case IF_MRD:
        case IF_MRW:
        case IF_MWR:
            break;

        case IF_RRD_MRD:
            break;

        case IF_RWR_MRD:
            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
            break;

        case IF_MRD_RRD:
        case IF_MWR_RRD:
        case IF_MRW_RRD:
            break;

        case IF_MRD_CNS:
        case IF_MWR_CNS:
        case IF_MRW_CNS:
            break;

        case IF_RRW_MRD:

            assert(id->idGCref() == GCT_BYREF);
            assert(ins == INS_add || ins == INS_sub);

            // Mark it as holding a GCT_BYREF
            emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
            break;

        default:
#ifdef  DEBUG
            emitDispIns(id, false, false, false);
#endif
            assert(!"unexpected GC ref instruction format");
        }
    }
    else
    {
        if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
        {
        }
        else
        {
            switch (id->idInsFmt())
            {
            case IF_RWR_MRD:
                emitGCregDeadUpd(id->idReg1(), dst);
                break;
            default:
                break;
            }

            if (ins == INS_mulEAX || ins == INS_imulEAX)
            {
                emitGCregDeadUpd(REG_EAX, dst);
                emitGCregDeadUpd(REG_EDX, dst);
            }

            // For the three operand imul instruction the target register
            // is encoded in the opcode

            if (instrIs3opImul(ins))
            {
                regNumber tgtReg = inst3opImulReg(ins);
                emitGCregDeadUpd(tgtReg, dst);
            }
        }
    }

    return  dst;
}

/*****************************************************************************
 *
 *  Output an instruction with one register operand.
 */

BYTE*               emitter::emitOutputR(BYTE* dst, instrDesc* id)
{
    size_t          code;

    instruction     ins  = id->idIns();
    regNumber       reg  = id->idReg1();
    emitAttr        size = id->idOpSize();

    // We would to update GC info correctly
    assert(!IsSSE2Instruction(ins)); 
    assert(!IsAVXInstruction(ins)); 

    // Get the 'base' opcode 
    switch (ins)
    {
    case INS_inc:
    case INS_dec:

#ifdef _TARGET_AMD64_
        if (true)
#else
        if (size == EA_1BYTE)
#endif
        {
            assert(INS_inc_l == INS_inc + 1);
            assert(INS_dec_l == INS_dec + 1);

            // Can't use the compact form, use the long form
            ins = (instruction)(ins + 1);
            if (size == EA_2BYTE)
            {
                // Output a size prefix for a 16-bit operand
                dst += emitOutputByte(dst, 0x66);
            }

            code = insCodeRR(ins);
            if (size != EA_1BYTE)
            {
                // Set the 'w' bit to get the large version
                code |= 0x1;
            }

            if (TakesRexWPrefix(ins, size))
            {
                code = AddRexWPrefix(ins, code);
            }

            // Register...
            unsigned regcode = insEncodeReg012(ins, reg, size, &code);

            // Output the REX prefix
            dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

            dst += emitOutputWord(dst, code | (regcode << 8));
        }
        else
        {
            if (size == EA_2BYTE)
            {
                // Output a size prefix for a 16-bit operand
                dst += emitOutputByte(dst, 0x66);
            }
            dst += emitOutputByte(dst, insCodeRR(ins  ) | insEncodeReg012(ins, reg, size, NULL));
        }
        break;

    case INS_pop:
    case INS_pop_hide:
    case INS_push:
    case INS_push_hide:

        assert(size == EA_PTRSIZE);
        code = insEncodeOpreg(ins, reg, size);

        assert(!TakesVexPrefix(ins));
        assert(!TakesRexWPrefix(ins, size));

        // Output the REX prefix
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

        dst += emitOutputByte(dst, code);
        break;

    case INS_seto:
    case INS_setno:
    case INS_setb:
    case INS_setae:
    case INS_sete:
    case INS_setne:
    case INS_setbe:
    case INS_seta:
    case INS_sets:
    case INS_setns:
    case INS_setpe:
    case INS_setpo:
    case INS_setl:
    case INS_setge:
    case INS_setle:
    case INS_setg:

        assert(id->idGCref() == GCT_NONE);
        assert(size == EA_1BYTE);

        code = insEncodeMRreg(ins, reg, EA_1BYTE, insCodeMR(ins));

        // Output the REX prefix
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

        // We expect this to always be a 'big' opcode
        assert(code & 0x00FF0000);

        dst += emitOutputByte(dst, code >> 16);
        dst += emitOutputWord(dst, code & 0x0000FFFF);

        break;

    case INS_mulEAX:
    case INS_imulEAX:

        // Kill off any GC refs in EAX or EDX
        emitGCregDeadUpd(REG_EAX, dst);
        emitGCregDeadUpd(REG_EDX, dst);

        __fallthrough;

    default:

        assert(id->idGCref() == GCT_NONE);

        code = insEncodeMRreg(ins, reg, size, insCodeMR(ins));

        if (size != EA_1BYTE)
        {
            // Set the 'w' bit to get the large version
            code |= 0x1;

            if (size == EA_2BYTE)
            {
                // Output a size prefix for a 16-bit operand
                dst += emitOutputByte(dst, 0x66);
            }
        }
        
        code = AddVexPrefixIfNeeded(ins, code, size);

        if (TakesRexWPrefix(ins, size))
        {
            code = AddRexWPrefix(ins, code);
        }

        // Output the REX prefix
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

        dst += emitOutputWord(dst, code);
        break;
    }

    // Are we writing the register? if so then update the GC information
    switch (id->idInsFmt())
    {
    case IF_RRD:
        break;
    case IF_RWR:
        if  (id->idGCref())
            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
        else
            emitGCregDeadUpd(id->idReg1(), dst);
        break;
    case IF_RRW:
        {
#ifdef DEBUG
            regMaskTP regMask = genRegMask(reg);
#endif
            if  (id->idGCref())
            {
                // The reg must currently be holding either a gcref or a byref
                // and the instruction must be inc or dec
                assert(((emitThisGCrefRegs | emitThisByrefRegs) & regMask) &&
                        (ins == INS_inc || ins == INS_dec || ins == INS_inc_l || ins == INS_dec_l));
                assert(id->idGCref() == GCT_BYREF);
                // Mark it as holding a GCT_BYREF
                emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
            }
            else
            {
                // Can't use RRW to trash a GC ref.  It's OK for unverifiable code
                // to trash Byrefs.
                assert((emitThisGCrefRegs & regMask) == 0);
            }
        }
        break;
    default:
#ifdef  DEBUG
        emitDispIns(id, false, false, false);
#endif
        assert(!"unexpected instruction format");
        break;
    }

    return  dst;
}

/*****************************************************************************
 *
 *  Output an instruction with two register operands.
 */

BYTE*               emitter::emitOutputRR(BYTE* dst, instrDesc* id)
{
    size_t          code;

    instruction     ins  = id->idIns();
    regNumber       reg1 = id->idReg1();
    regNumber       reg2 = id->idReg2();
    emitAttr        size = id->idOpSize();

    // Get the 'base' opcode
    code = insCodeRM(ins);
    code = AddVexPrefixIfNeeded(ins, code, size);
    if (IsSSE2Instruction(ins) || IsAVXInstruction(ins))
    {
        code = insEncodeRMreg(ins, code);

        if (TakesRexWPrefix(ins, size))
        {
            code = AddRexWPrefix(ins, code);
        }
    }
    else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins)))
    {
        code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE);
#ifdef _TARGET_AMD64_

        assert((size < EA_4BYTE) || (insIsCMOV(ins)));
        if ((size == EA_8BYTE) || (ins == INS_movsx))
        {
            code = AddRexWPrefix(ins, code);
        }
    }
    else if (ins == INS_movsxd)
    {
        code = insEncodeRMreg(ins, code);

#endif // _TARGET_AMD64_
    }
    else
    {
        code = insEncodeMRreg(ins, insCodeMR(ins));

        if (ins != INS_test)
        {
            code |= 2;
        }

        switch (size)
        {
        case EA_1BYTE:
            noway_assert(RBM_BYTE_REGS & genRegMask(reg1));
            noway_assert(RBM_BYTE_REGS & genRegMask(reg2));
            break;

        case EA_2BYTE:
            // Output a size prefix for a 16-bit operand
            dst += emitOutputByte(dst, 0x66);
            __fallthrough;

        case EA_4BYTE:
            // Set the 'w' bit to get the large version
            code |= 0x1;
            break;

#ifdef _TARGET_AMD64_
        case EA_8BYTE:
            // TODO-AMD64-CQ: Better way to not emit REX.W when we don't need it
            // Don't need to zero out the high bits explicitly
            if ((ins != INS_xor) || (reg1 != reg2)) 
            {
                code = AddRexWPrefix(ins, code);
            }

            // Set the 'w' bit to get the large version
            code |= 0x1;
            break;

#endif // _TARGET_AMD64_

        default:
            assert(!"unexpected size");
        }
    }


    unsigned regCode = insEncodeReg345(ins, reg1, size, &code);
    regCode |= insEncodeReg012(ins, reg2, size, &code);

    // In case of AVX instructions that take 3 operands, we generally want to encode reg1
    // as first source.  In this case, reg1 is both a source and a destination.
    // The exception is the "merge" 3-operand case, where we have a move instruction, such
    // as movss, and we want to merge the source with itself.
    //
    // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
    // now we use the single source as source1 and source2.
    if (IsThreeOperandBinaryAVXInstruction(ins))
    {
        // encode source/dest operand reg in 'vvvv' bits in 1's compliement form
        code = insEncodeReg3456(ins, reg1, size, code);
    }
    else if (IsThreeOperandMoveAVXInstruction(ins))
    {
        // encode source operand reg in 'vvvv' bits in 1's compliement form
        code = insEncodeReg3456(ins, reg2, size, code);
    }

    // Output the REX prefix
    dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

    // Is this a 'big' opcode?
    if (code & 0xFF000000)
    {
        // Output the highest word of the opcode
        dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF;
    }
    else if(code & 0x00FF0000)
    {
        dst += emitOutputByte(dst, code >> 16); code &= 0x0000FFFF;
    }

    // If byte 4 is 0xC0, then it contains the Mod/RM encoding for a 3-byte
    // encoding.  Otherwise, this is an instruction with a 4-byte encoding,
    // and the MOd/RM encoding needs to go in the 5th byte.
    // TODO-XArch-CQ: Currently, this will only support registers in the 5th byte.
    // We probably need a different mechanism to identify the 4-byte encodings.
    if ((code & 0xFF) == 0x00)
    {
        // This case happens for AVX instructions only
        assert(IsAVXInstruction(ins));
        if ((code & 0xFF00) == 0xC000)
        {
            dst += emitOutputByte(dst, (0xC0 | regCode));
        }
        else
        {
            dst += emitOutputByte(dst, (code >> 8) & 0xFF);
            dst += emitOutputByte(dst, (0xC0 | regCode));
        }
    }
    else if ((code & 0xFF00) == 0xC000)
    {
        dst += emitOutputWord(dst, code | (regCode << 8));
    }
    else
    {
        dst += emitOutputWord(dst, code);
        dst += emitOutputByte(dst, (0xC0 | regCode));
    }

    // Does this instruction operate on a GC ref value?
    if  (id->idGCref())
    {
        switch (id->idInsFmt())
        {
        case IF_RRD_RRD:
            break;

        case IF_RWR_RRD:

            if  (emitSyncThisObjReg != REG_NA && emitIGisInProlog(emitCurIG) &&
                 reg2 == (int)REG_ARG_0)
            {
                // We're relocating "this" in the prolog
                assert(emitComp->lvaIsOriginalThisArg(0));
                assert(emitComp->lvaTable[0].lvRegister);
                assert(emitComp->lvaTable[0].lvRegNum == reg1);

                if (emitFullGCinfo)
                {
                    emitGCregLiveSet(id->idGCref(), genRegMask(reg1), dst, true);
                    break;
                }
                else
                {
                    /* If emitFullGCinfo==false, the we don't use any
                       regPtrDsc's and so explictly note the location
                       of "this" in GCEncode.cpp
                     */
                }
            }

            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
            break;

        case IF_RRW_RRD:


            switch (id->idIns())
            {
            /*
                This must be one of the following cases:

                xor reg, reg        to assign NULL

                and r1 , r2         if (ptr1 && ptr2) ...
                or  r1 , r2         if (ptr1 || ptr2) ...

                add r1 , r2         to compute a normal byref
                sub r1 , r2         to compute a strange byref (VC only)

            */
            case INS_xor:
                assert(id->idReg1() == id->idReg2());
                emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
                break;

            case INS_or:
            case INS_and:
                emitGCregDeadUpd(id->idReg1(), dst);
                break;

            case INS_add:
            case INS_sub:
                assert(id->idGCref() == GCT_BYREF);

#ifdef DEBUG
                regMaskTP regMask;
                regMask = genRegMask(reg1) | genRegMask(reg2);

                // r1/r2 could have been a GCREF as GCREF + int=BYREF
                //                            or BYREF+/-int=BYREF
                assert(((regMask & emitThisGCrefRegs) && (ins == INS_add                  )) ||
                       ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
#endif
                // Mark r1 as holding a byref
                emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
                break;

            default:
#ifdef  DEBUG
                emitDispIns(id, false, false, false);
#endif
                assert(!"unexpected GC reg update instruction");
            }

            break;

        case IF_RRW_RRW:
            // This must be "xchg reg1, reg2"
            assert(id->idIns() == INS_xchg);

            // If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC
            // register pointer mask.
#ifndef LEGACY_BACKEND
            GCtype gc1, gc2;

            gc1 = emitRegGCtype(reg1);
            gc2 = emitRegGCtype(reg2);

            if (gc1 != gc2)
            {
                // Kill the GC-info about the GC registers

                if (needsGC(gc1))
                    emitGCregDeadUpd(reg1, dst);

                if (needsGC(gc2))
                    emitGCregDeadUpd(reg2, dst);

                // Now, swap the info

                if (needsGC(gc1))
                    emitGCregLiveUpd(gc1, reg2, dst);

                if (needsGC(gc2))
                    emitGCregLiveUpd(gc2, reg1, dst);
            }
#endif // !LEGACY_BACKEND
            break;

        default:
#ifdef  DEBUG
            emitDispIns(id, false, false, false);
#endif
            assert(!"unexpected GC ref instruction format");
        }
    }
    else
    {
        if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
        {
        }
        else
        {
            switch (id->idInsFmt())
            {
            case IF_RRD_CNS:
                // INS_mulEAX can not be used with any of these formats
                assert(ins != INS_mulEAX && ins != INS_imulEAX);

                // For the three operand imul instruction the target
                // register is encoded in the opcode

                if (instrIs3opImul(ins))
                {
                    regNumber tgtReg = inst3opImulReg(ins);
                    emitGCregDeadUpd(tgtReg, dst);
                }
                break;

            case IF_RWR_RRD:
            case IF_RRW_RRD:
                // INS_movxmm2i writes to reg2.
                if (ins == INS_mov_xmm2i)
                {
                    emitGCregDeadUpd(id->idReg2(), dst);
                }
                else
                {
                    emitGCregDeadUpd(id->idReg1(), dst);
                }
                break;

            default:
                break;
            }
        }
    }

    return  dst;
}

#ifdef FEATURE_AVX_SUPPORT
BYTE*               emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
{
    size_t          code;

    instruction     ins       = id->idIns();
    assert(IsAVXInstruction(ins));
    assert(IsThreeOperandAVXInstruction(ins));
    regNumber       targetReg = id->idReg1();
    regNumber       src1      = id->idReg2();
    regNumber       src2      = id->idReg3();
    emitAttr        size      = id->idOpSize();

    code = insCodeRM(ins);
    code = AddVexPrefixIfNeeded(ins, code, size);
    code = insEncodeRMreg(ins, code);

    if (TakesRexWPrefix(ins, size))
    {
        code = AddRexWPrefix(ins, code);
    }

    unsigned regCode = insEncodeReg345(ins, targetReg, size, &code);
    regCode |= insEncodeReg012(ins, src2, size, &code);
    // encode source operand reg in 'vvvv' bits in 1's compliement form
    code = insEncodeReg3456(ins, src1, size, code);

    // Output the REX prefix
    dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

    // Is this a 'big' opcode?
    if (code & 0xFF000000)
    {
        // Output the highest word of the opcode
        dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF;
    }
    else if (code & 0x00FF0000)
    {
        dst += emitOutputByte(dst, code >> 16); code &= 0x0000FFFF;
    }

    // If byte 4 is 0xC0, then it contains the Mod/RM encoding for a 3-byte
    // encoding.  Otherwise, this is an instruction with a 4-byte encoding,
    // and the MOd/RM encoding needs to go in the 5th byte.
    // TODO-XArch-CQ: Currently, this will only support registers in the 5th byte.
    // We probably need a different mechanism to identify the 4-byte encodings.
    if ((code & 0xFF) == 0x00)
    {
        // This case happens for AVX instructions only
        assert(IsAVXInstruction(ins));
        if ((code & 0xFF00) == 0xC000)
        {
            dst += emitOutputByte(dst, (0xC0 | regCode));
        }
        else
        {
            dst += emitOutputByte(dst, (code >> 8) & 0xFF);
            dst += emitOutputByte(dst, (0xC0 | regCode));
        }
    }
    else if ((code & 0xFF00) == 0xC000)
    {
        dst += emitOutputWord(dst, code | (regCode << 8));
    }
    else
    {
        dst += emitOutputWord(dst, code);
        dst += emitOutputByte(dst, (0xC0 | regCode));
    }

    noway_assert(!id->idGCref());

    return dst;
}
#endif


/*****************************************************************************
 *
 *  Output an instruction with a register and constant operands.
 */

BYTE*               emitter::emitOutputRI(BYTE* dst, instrDesc* id)
{
    size_t       code;
    emitAttr     size      = id->idOpSize();
    instruction  ins       = id->idIns();
    regNumber    reg       = id->idReg1();
    ssize_t      val       = emitGetInsSC(id);
    bool         valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);

#ifdef RELOC_SUPPORT
    if (id->idIsCnsReloc())
    {
        valInByte = false;      // relocs can't be placed in a byte
    }
#endif

    noway_assert(emitVerifyEncodable(ins, size, reg));

#ifdef _TARGET_AMD64_
    if (IsSSEOrAVXInstruction(ins))
    // Handle SSE2 instructions of the form "opcode reg, immed8"
    {
        assert(id->idGCref() == GCT_NONE);
        assert(valInByte);
        assert(ins == INS_psrldq || ins == INS_pslldq);

        // Get the 'base' opcode.
        code = insCodeMI(ins);
        code = AddVexPrefixIfNeeded(ins, code, size);
        code = insEncodeMIreg(ins, reg, size, code);
        assert(code & 0x00FF0000);
        if (TakesVexPrefix(ins))
        {
            // The 'vvvv' bits encode the destination register, which for this case (RI)
            // is the same as the source.
            code = insEncodeReg3456(ins, reg, size, code);        
        }

        // In case of psrldq
        // Reg/Opcode = 3
        // R/M = reg1
        //
        // In case of pslldq
        // Reg/Opcode = 7
        // R/M = reg1
        regNumber regOpcode = (regNumber) ((ins == INS_psrldq) ? 3 : 7);
        unsigned regcode = (insEncodeReg345(ins, regOpcode, size, &code) | insEncodeReg012(ins, reg, size, &code)) << 8;

        // Output the REX prefix
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

        if  (code & 0xFF000000)
            dst += emitOutputWord(dst, code >> 16);
        else if (code & 0xFF0000)
            dst += emitOutputByte(dst, code >> 16);

        dst += emitOutputWord(dst, code | regcode);

        dst += emitOutputByte(dst, val);

        return dst;
    }
#endif //_TARGET_AMD64_

    // The 'mov' opcode is special
    if  (ins == INS_mov)
    {
        code = insCodeACC(ins);
        assert(code < 0x100);

        code |= 0x08;                   // Set the 'w' bit
        unsigned regcode = insEncodeReg012(ins, reg, size, &code);
        code |= regcode;

        // This is INS_mov and will not take VEX prefix
        assert(!TakesVexPrefix(ins));

        if (TakesRexWPrefix(ins, size))
        {
            code = AddRexWPrefix(ins, code);
        }

        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

        dst += emitOutputByte(dst, code);
        if (size == EA_4BYTE)
        {
            dst += emitOutputLong(dst, val);
        }
#ifdef _TARGET_AMD64_
        else
        {
            assert(size == EA_PTRSIZE);
            dst += emitOutputSizeT(dst, val);
        }
#endif

#ifdef RELOC_SUPPORT
        if (id->idIsCnsReloc())
        {
            emitRecordRelocation((void*)(dst - (unsigned)EA_SIZE(size)), (void*)(size_t)val, IMAGE_REL_BASED_MOFFSET);
        }
#endif

        goto DONE;
    }

    // Decide which encoding is the shortest
    bool    useSigned, useACC;

    if (reg == REG_EAX && !instrIs3opImul(ins))
    {
         if (size == EA_1BYTE || (ins == INS_test))
         {
             // For al, ACC encoding is always the smallest
             useSigned = false; useACC = true;
         }
         else
         {
             /* For ax/eax, we avoid ACC encoding for small constants as we
              * can emit the small constant and have it sign-extended.
              * For big constants, the ACC encoding is better as we can use
              * the 1 byte opcode
              */

             if (valInByte)
             {
                 // avoid using ACC encoding
                 useSigned = true;  useACC = false;
             }
             else
             {
                 useSigned = false; useACC = true;
             }
         }
    }
    else
    {
        useACC = false;

        if (valInByte)
            useSigned = true;
        else
            useSigned = false;
    }

    // "test" has no 's' bit
    if (ins == INS_test) useSigned = false;

    // Get the 'base' opcode
    if (useACC)
    {
        assert(!useSigned);
        code = insCodeACC(ins);
    }
    else
    {
        assert(!useSigned || valInByte);

        // Some instructions (at least 'imul') do not have a
        // r/m, immed form, but do have a dstReg,srcReg,imm8 form.
        if (valInByte && useSigned && insNeedsRRIb(ins))
        {
            code = insEncodeRRIb(ins, reg, size);
        }
        else
        {
            code = insCodeMI(ins);
            code = AddVexPrefixIfNeeded(ins, code, size);
            code = insEncodeMIreg(ins, reg, size, code);
        }
    }

    switch (size)
    {
    case EA_1BYTE:
        break;

    case EA_2BYTE:
        // Output a size prefix for a 16-bit operand
        dst += emitOutputByte(dst, 0x66);
        __fallthrough;

    case EA_4BYTE:
        // Set the 'w' bit to get the large version
        code |= 0x1;
        break;

#ifdef _TARGET_AMD64_
    case EA_8BYTE:
        /* Set the 'w' bit to get the large version */
        /* and the REX.W bit to get the really large version */

        code = AddRexWPrefix(ins, code);
        code |= 0x1;
        break;
#endif

    default:
        assert(!"unexpected size");
    }

    // Output the REX prefix
    dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

    // Does the value fit in a sign-extended byte? 
    // Important!  Only set the 's' bit when we have a size larger than EA_1BYTE.
    // Note: A sign-extending immediate when (size == EA_1BYTE) is invalid in 64-bit mode.

    if  (useSigned && (size > EA_1BYTE))
    {
        // We can just set the 's' bit, and issue an immediate byte

        code |= 0x2;             // Set the 's' bit to use a sign-extended immediate byte.
        dst += emitOutputWord(dst, code);
        dst += emitOutputByte(dst, val);
    }
    else
    {
        // Can we use an accumulator (EAX) encoding?
        if  (useACC)
            dst += emitOutputByte(dst, code);
        else
            dst += emitOutputWord(dst, code);

        switch (size)
        {
        case EA_1BYTE:   dst += emitOutputByte(dst, val);  break;
        case EA_2BYTE:   dst += emitOutputWord(dst, val);  break;
        case EA_4BYTE:   dst += emitOutputLong(dst, val);  break;
#ifdef _TARGET_AMD64_
        case EA_8BYTE:   dst += emitOutputLong(dst, val);  break;
#endif // _TARGET_AMD64_
        default: break;
        }

#ifdef RELOC_SUPPORT
        if (id->idIsCnsReloc())
        {
            emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
            assert(size == EA_4BYTE);
        }
#endif
    }

DONE:

    // Does this instruction operate on a GC ref value?
    if  (id->idGCref())
    {
        switch (id->idInsFmt())
        {
        case IF_RRD_CNS:
            break;

        case IF_RWR_CNS:
            emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
            break;

        case IF_RRW_CNS:
            assert(id->idGCref() == GCT_BYREF);

#ifdef DEBUG
            regMaskTP regMask;
            regMask = genRegMask(reg);
            // FIXNOW review the other places and relax the assert there too

            // The reg must currently be holding either a gcref or a byref
            // GCT_GCREF+int = GCT_BYREF, and GCT_BYREF+/-int = GCT_BYREF
            if (emitThisGCrefRegs & regMask)
                assert(ins == INS_add);
            if (emitThisByrefRegs & regMask)
                assert(ins == INS_add || ins == INS_sub);
#endif
            // Mark it as holding a GCT_BYREF
            emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
            break;

        default:
#ifdef  DEBUG
            emitDispIns(id, false, false, false);
#endif
            assert(!"unexpected GC ref instruction format");
        }

        // mul can never produce a GC ref
        assert(!instrIs3opImul(ins));
        assert(ins != INS_mulEAX && ins != INS_imulEAX);
    }
    else
    {
        switch (id->idInsFmt())
        {
        case IF_RRD_CNS:
            // INS_mulEAX can not be used with any of these formats
            assert(ins != INS_mulEAX && ins != INS_imulEAX);

            // For the three operand imul instruction the target
            // register is encoded in the opcode

            if (instrIs3opImul(ins))
            {
                regNumber tgtReg = inst3opImulReg(ins);
                emitGCregDeadUpd(tgtReg, dst);
            }
            break;

        case IF_RRW_CNS:
        case IF_RWR_CNS:
            assert(!instrIs3opImul(ins));

            emitGCregDeadUpd(id->idReg1(), dst);
            break;

        default:
#ifdef  DEBUG
            emitDispIns(id, false, false, false);
#endif
            assert(!"unexpected GC ref instruction format");
        }

    }

    return dst;
}

/*****************************************************************************
 *
 *  Output an instruction with a constant operand.
 */

BYTE*               emitter::emitOutputIV(BYTE* dst, instrDesc* id)
{
    size_t       code;
    instruction  ins       = id->idIns();
    emitAttr     size      = id->idOpSize();
    ssize_t      val       = emitGetInsSC(id);
    bool         valInByte = ((signed char)val == val);

    // We would to update GC info correctly
    assert(!IsSSE2Instruction(ins)); 
    assert(!IsAVXInstruction(ins)); 

#ifdef _TARGET_AMD64_
    // all these opcodes take a sign-extended 4-byte immediate, max
    noway_assert(size < EA_8BYTE || ((int)val == val && !id->idIsCnsReloc()));
#endif

#ifdef RELOC_SUPPORT
    if (id->idIsCnsReloc())
    {
        valInByte = false;        // relocs can't be placed in a byte

        // Of these instructions only the push instruction can have reloc
        assert(ins == INS_push || ins == INS_push_hide);
    }
#endif

    switch (ins)
    {
    case INS_jge:
        assert((val >= -128) && (val <= 127));
        dst += emitOutputByte(dst, insCode(ins));
        dst += emitOutputByte(dst, val);
        break;

    case INS_loop:
        assert((val >= -128) && (val <= 127));
        dst += emitOutputByte(dst, insCodeMI(ins));
        dst += emitOutputByte(dst, val);
        break;

    case INS_ret:
        assert(val);
        dst += emitOutputByte(dst, insCodeMI(ins));
        dst += emitOutputWord(dst, val);
        break;

    case INS_push_hide:
    case INS_push:
        code = insCodeMI(ins);

        // Does the operand fit in a byte?
        if  (valInByte)
        {
            dst += emitOutputByte(dst, code|2);
            dst += emitOutputByte(dst, val);
        }
        else
        {
            if (TakesRexWPrefix(ins, size))
            {
                code = AddRexWPrefix(ins, code);
                dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
            }

            dst += emitOutputByte(dst, code);
            dst += emitOutputLong(dst, val);
#ifdef RELOC_SUPPORT
            if (id->idIsCnsReloc())
            {
                emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
            }
#endif
        }

        // Did we push a GC ref value?
        if  (id->idGCref())
        {
#ifdef  DEBUG
            printf("UNDONE: record GCref push [cns]\n");
#endif
        }

        break;

    default:
        assert(!"unexpected instruction");
    }

    return  dst;
}

/*****************************************************************************
 *
 *  Output a local jump instruction.
 *  This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that needs
 *  to get bound to an actual address and processed by branch shortening.
 */

BYTE*               emitter::emitOutputLJ(BYTE* dst, instrDesc* i)
{
    unsigned        srcOffs;
    unsigned        dstOffs;
    ssize_t         distVal;

    instrDescJmp*   id  = (instrDescJmp*)i;
    instruction     ins = id->idIns();
    bool            jmp;
    bool            relAddr = true; // does the instruction use relative-addressing?

    // SSE2 doesnt make any sense here
    assert(!IsSSE2Instruction(ins)); 
    assert(!IsAVXInstruction(ins)); 

    size_t          ssz;
    size_t          lsz;

    switch (ins)
    {
    default:
        ssz = JCC_SIZE_SMALL;
        lsz = JCC_SIZE_LARGE;
        jmp = true;
        break;

    case INS_jmp:
        ssz = JMP_SIZE_SMALL;
        lsz = JMP_SIZE_LARGE;
        jmp = true;
        break;

    case INS_call:
        ssz =
        lsz = CALL_INST_SIZE;
        jmp = false;
        break;

    case INS_push_hide:
    case INS_push:
        ssz =
        lsz = 5;
        jmp = false;
        relAddr = false;
        break;

    case INS_mov:
    case INS_lea:
        ssz =
        lsz = id->idCodeSize();
        jmp = false;
        relAddr = false;
        break;
    }

    // Figure out the distance to the target
    srcOffs = emitCurCodeOffs(dst);
    dstOffs = id->idAddr()->iiaIGlabel->igOffs;

    if (relAddr)
        distVal = (ssize_t) (emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
    else
        distVal = (ssize_t) emitOffsetToPtr(dstOffs);

    if  (dstOffs <= srcOffs)
    {
        // This is a backward jump - distance is known at this point
#if     DEBUG_EMIT
        if  (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
        {
            size_t      blkOffs = id->idjIG->igOffs;

            if  (INTERESTING_JUMP_NUM == 0)
            printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
            printf("[3] Jump  block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
            printf("[3] Jump        is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
            printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
        }
#endif

        // Can we use a short jump?
        if  (jmp && distVal - ssz >= (size_t)JMP_DIST_SMALL_MAX_NEG)
        {
            emitSetShortJump(id);
        }
    }
    else
    {
        // This is a  forward jump - distance will be an upper limit
        emitFwdJumps  = true;

        // The target offset will be closer by at least 'emitOffsAdj', but only if this
        // jump doesn't cross the hot-cold boundary. 
        if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
        {
            dstOffs -= emitOffsAdj;
            distVal -= emitOffsAdj;
        }

        // Record the location of the jump for later patching
        id->idjOffs = dstOffs;

        // Are we overflowing the id->idjOffs bitfield?
        if (id->idjOffs != dstOffs)
            IMPL_LIMITATION("Method is too large");

#if     DEBUG_EMIT
        if  (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
        {
            size_t      blkOffs = id->idjIG->igOffs;

            if  (INTERESTING_JUMP_NUM == 0)
            printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
            printf("[4] Jump  block is at %08X\n"              , blkOffs);
            printf("[4] Jump        is at %08X\n"              , srcOffs);
            printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
        }
#endif

        // Can we use a short jump?
        if  (jmp && distVal - ssz <= (size_t)JMP_DIST_SMALL_MAX_POS)
        {
            emitSetShortJump(id);
        }
    }

    // Adjust the offset to emit relative to the end of the instruction
    if (relAddr)
        distVal -= id->idjShort ? ssz : lsz;

#ifdef  DEBUG
    if (0&&emitComp->verbose)
    {
        size_t  sz          = id->idjShort ?ssz:lsz;
        int     distValSize = id->idjShort ? 4 : 8;
        printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = %08XH\n",
            (dstOffs <= srcOffs)?"Fwd":"Bwd", emitComp->dspPtr(id), id->idDebugOnlyInfo()->idNum,
            distValSize, srcOffs+sz, distValSize, dstOffs,
            distVal);
    }
#endif

    // What size jump should we use?
    if  (id->idjShort)
    {
        // Short jump
        assert(!id->idjKeepLong);
        assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);

        assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
        assert(JMP_SIZE_SMALL == 2);

        assert(jmp);

        if (emitInstCodeSz(id) != JMP_SIZE_SMALL)
        {
            emitOffsAdj += emitInstCodeSz(id) - JMP_SIZE_SMALL;

#ifdef  DEBUG
            if (emitComp->verbose)
                printf("; NOTE: size of jump [%08X] mis-predicted\n", emitComp->dspPtr(id));
#endif
        }

        dst += emitOutputByte(dst, insCode(ins));

        // For forward jumps, record the address of the distance value
        id->idjTemp.idjAddr = (distVal > 0) ? dst : nullptr;

        dst += emitOutputByte(dst, distVal);
    }
    else
    {
        size_t code;

        // Long  jump
        if  (jmp)
        {
            assert(INS_jmp + (INS_l_jmp - INS_jmp) == INS_l_jmp);
            assert(INS_jo  + (INS_l_jmp - INS_jmp) == INS_l_jo );
            assert(INS_jb  + (INS_l_jmp - INS_jmp) == INS_l_jb );
            assert(INS_jae + (INS_l_jmp - INS_jmp) == INS_l_jae);
            assert(INS_je  + (INS_l_jmp - INS_jmp) == INS_l_je );
            assert(INS_jne + (INS_l_jmp - INS_jmp) == INS_l_jne);
            assert(INS_jbe + (INS_l_jmp - INS_jmp) == INS_l_jbe);
            assert(INS_ja  + (INS_l_jmp - INS_jmp) == INS_l_ja );
            assert(INS_js  + (INS_l_jmp - INS_jmp) == INS_l_js );
            assert(INS_jns + (INS_l_jmp - INS_jmp) == INS_l_jns);
            assert(INS_jpe + (INS_l_jmp - INS_jmp) == INS_l_jpe);
            assert(INS_jpo + (INS_l_jmp - INS_jmp) == INS_l_jpo);
            assert(INS_jl  + (INS_l_jmp - INS_jmp) == INS_l_jl );
            assert(INS_jge + (INS_l_jmp - INS_jmp) == INS_l_jge);
            assert(INS_jle + (INS_l_jmp - INS_jmp) == INS_l_jle);
            assert(INS_jg  + (INS_l_jmp - INS_jmp) == INS_l_jg );

            code = insCode((instruction)(ins + (INS_l_jmp - INS_jmp)));
        }
        else if (ins == INS_push || ins == INS_push_hide)
        {
            assert(insCodeMI(INS_push) == 0x68);
            code = 0x68;
        }
        else if (ins == INS_mov)
        {
            // Make it look like IF_SWR_CNS so that emitOutputSV emits the r/m32 for us
            insFormat tmpInsFmt   = id->idInsFmt();
            insGroup* tmpIGlabel  = id->idAddr()->iiaIGlabel;
            bool tmpDspReloc      = id->idIsDspReloc();

            id->idInsFmt(IF_SWR_CNS);
            id->idAddr()->iiaLclVar  = ((instrDescLbl*)id)->dstLclVar;
            id->idSetIsDspReloc(false);

            dst = emitOutputSV(dst, id, insCodeMI(ins));

            // Restore id fields with original values
            id->idInsFmt(tmpInsFmt);
            id->idAddr()->iiaIGlabel = tmpIGlabel;
            id->idSetIsDspReloc(tmpDspReloc);
            code = 0xCC;
        }
        else if (ins == INS_lea)
        {
            // Make an instrDesc that looks like IF_RWR_ARD so that emitOutputAM emits the r/m32 for us.
            // We basically are doing what emitIns_R_AI does.
            // TODO-XArch-Cleanup: revisit this.
            instrDescAmd idAmdStackLocal;
            instrDescAmd* idAmd = &idAmdStackLocal;
            *(instrDesc*)idAmd = *(instrDesc*)id; // copy all the "core" fields
            memset((BYTE*)idAmd + sizeof(instrDesc), 0, sizeof(instrDescAmd) - sizeof(instrDesc)); // zero out the tail that wasn't copied

            idAmd->idInsFmt(IF_RWR_ARD);
            idAmd->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
            idAmd->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
            emitSetAmdDisp(idAmd, distVal);     // set the displacement
            idAmd->idSetIsDspReloc(id->idIsDspReloc());
            assert(emitGetInsAmdAny(idAmd) == distVal); // make sure "disp" is stored properly

            UNATIVE_OFFSET sz = emitInsSizeAM(idAmd, insCodeRM(ins));
            idAmd->idCodeSize(sz);

            code = insCodeRM(ins);
            code |= (insEncodeReg345(ins, id->idReg1(), EA_PTRSIZE, &code) << 8);

            dst = emitOutputAM(dst, idAmd, code, nullptr);

            code = 0xCC;

            // For forward jumps, record the address of the distance value
            // Hard-coded 4 here because we already output the displacement, as the last thing.
            id->idjTemp.idjAddr = (dstOffs > srcOffs) ? (dst - 4) : nullptr;

            // We're done
            return dst;
        }
        else
        {
            code = 0xE8;
        }

        if (ins != INS_mov)
        {
            dst += emitOutputByte(dst, code);

            if  (code & 0xFF00)
                dst += emitOutputByte(dst, code >> 8);
        }

        // For forward jumps, record the address of the distance value
        id->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : NULL;

        dst += emitOutputLong(dst, distVal);

#ifndef _TARGET_AMD64_ // all REL32 on AMD have to go through recordRelocation
        if (emitComp->opts.compReloc)
#endif
        {
            if (!relAddr)
            {
                emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)distVal, IMAGE_REL_BASED_HIGHLOW);
            }
            else if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
            {
                assert(id->idjKeepLong);
                emitRecordRelocation((void*)(dst - sizeof(INT32)), dst + distVal, IMAGE_REL_BASED_REL32);
            }
        }
    }

    // Local calls kill all registers
    if (ins == INS_call && (emitThisGCrefRegs|emitThisByrefRegs))
        emitGCregDeadUpdMask(emitThisGCrefRegs|emitThisByrefRegs, dst);

    return  dst;
}

/*****************************************************************************
 *
 *  Append the machine code corresponding to the given instruction descriptor
 *  to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
 *  is the instruction group that contains the instruction. Updates '*dp' to
 *  point past the generated code, and returns the size of the instruction
 *  descriptor in bytes.
 */

#ifdef _PREFAST_
#pragma warning(push)
#pragma warning(disable:21000) // Suppress PREFast warning about overly large function
#endif
size_t              emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
{
    assert(emitIssuing);

    BYTE*           dst  = *dp;
    size_t          sz   = sizeof(instrDesc);
    instruction     ins  = id->idIns();
    unsigned char   callInstrSize = 0;

#ifdef DEBUG
    bool            dspOffs = emitComp->opts.dspGCtbls;
#endif // DEBUG

    emitAttr        size = id->idOpSize();

    assert(REG_NA == (int)REG_NA);

    assert(ins != INS_imul                  || size >= EA_4BYTE); // Has no 'w' bit
    assert(instrIs3opImul(id->idIns()) == 0 || size >= EA_4BYTE); // Has no 'w' bit

    VARSET_TP VARSET_INIT_NOCOPY(GCvars, VarSetOps::UninitVal());

    // What instruction format have we got?
    switch (id->idInsFmt())
    {
        size_t          code;
        size_t          regcode;
        int             args;
        CnsVal          cnsVal;

        BYTE*           addr;
        bool            recCall;

        regMaskTP       gcrefRegs;
        regMaskTP       byrefRegs;

        /********************************************************************/
        /*                        No operands                               */
        /********************************************************************/
    case IF_NONE:
        // the loop alignment pseudo instruction
        if (ins == INS_align)
        {
            sz = TINY_IDSC_SIZE;
            dst = emitOutputNOP(dst, (-(int)(size_t)dst) & 0x0f);
            assert(((size_t)dst & 0x0f) == 0);
            break;
        }

        if (ins == INS_nop)
        {
            dst = emitOutputNOP(dst, id->idCodeSize());
            break;
        }

        // the cdq instruction kills the EDX register implicitly
        if (ins == INS_cdq)
            emitGCregDeadUpd(REG_EDX, dst);

        __fallthrough;

#if FEATURE_STACK_FP_X87
    case IF_TRD:
    case IF_TWR:
    case IF_TRW:
#endif // FEATURE_STACK_FP_X87

        assert(id->idGCref() == GCT_NONE);

        code = insCodeMR(ins);

#ifdef _TARGET_AMD64_
        // Support only scalar AVX instructions and hence size is hard coded to 4-byte.
        code = AddVexPrefixIfNeeded(ins, code, EA_4BYTE);

        if (ins == INS_cdq && TakesRexWPrefix(ins,id->idOpSize()))
        {
            code = AddRexWPrefix(ins, code);
        }
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
#endif
        // Is this a 'big' opcode?
        if (code & 0xFF000000)
        {
            // The high word and then the low word
            dst += emitOutputWord(dst, code >> 16);
            code &= 0x0000FFFF;
            dst += emitOutputWord(dst, code);
        }
        else if (code & 0x00FF0000)
        {
            // The high byte and then the low word
            dst += emitOutputByte(dst, code >> 16);
            code &= 0x0000FFFF;
            dst += emitOutputWord(dst, code);
        }
        else if (code & 0xFF00)
        {
            // The 2 byte opcode
            dst += emitOutputWord(dst, code);
        }
        else
        {
            // The 1 byte opcode
            dst += emitOutputByte(dst, code);
        }

        break;

        /********************************************************************/
        /*                Simple constant, local label, method              */
        /********************************************************************/

    case IF_CNS:
        dst = emitOutputIV(dst, id);
        sz  = emitSizeOfInsDsc(id);
        break;

    case IF_LABEL:
    case IF_RWR_LABEL:
    case IF_SWR_LABEL:
        assert(id->idGCref() == GCT_NONE);
        assert(id->idIsBound());

        // TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()?
        dst = emitOutputLJ(dst, id);
        sz  = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp));
        break;

    case IF_METHOD:
    case IF_METHPTR:
        // Assume we'll be recording this call
        recCall  = true;

        // Get hold of the argument count and field Handle
        args = emitGetInsCDinfo(id);

        // Is this a "fat" call descriptor?
        if (id->idIsLargeCall())
        {
            instrDescCGCA* idCall = (instrDescCGCA*) id;
            gcrefRegs   = idCall->idcGcrefRegs;
            byrefRegs   = idCall->idcByrefRegs;
            VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
            sz          = sizeof(instrDescCGCA);
        }
        else
        {
            assert(!id->idIsLargeDsp());
            assert(!id->idIsLargeCns());

            gcrefRegs   = emitDecodeCallGCregs(id);
            byrefRegs   = 0;
            VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
            sz          = sizeof(instrDesc);
        }

        addr = (BYTE*)id->idAddr()->iiaAddr;
        assert(addr != nullptr);

        // Some helpers don't get recorded in GC tables
        if (id->idIsNoGC())
        {
            recCall = false;
        }

        // What kind of a call do we have here?
        if (id->idInsFmt() == IF_METHPTR)
        {
            // This is call indirect via a method pointer

            code = insCodeMR(ins);
            if (ins == INS_i_jmp)
            {
                code |= 1;
            }

            if (id->idIsDspReloc())
            {
                dst += emitOutputWord(dst, code | 0x0500);
#ifdef _TARGET_AMD64_
                dst += emitOutputLong(dst, 0);
#else
                dst += emitOutputLong(dst, (int)addr);
#endif
                emitRecordRelocation((void*)(dst - sizeof(int)), addr, IMAGE_REL_BASED_DISP32);
            }
            else
            {
#ifdef _TARGET_X86_
                dst += emitOutputWord(dst, code | 0x0500);
#else //_TARGET_AMD64_
                // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
                // This addr mode should never be used while generating relocatable ngen code nor if
                // the addr can be encoded as pc-relative address.
                noway_assert(!emitComp->opts.compReloc);
                noway_assert(codeGen->genAddrRelocTypeHint((size_t)addr) != IMAGE_REL_BASED_REL32);
                noway_assert((int)addr == (ssize_t)addr);

                // This requires, specifying a SIB byte after ModRM byte.
                dst += emitOutputWord(dst, code | 0x0400);
                dst += emitOutputByte(dst, 0x25);
#endif //_TARGET_AMD64_
                dst += emitOutputLong(dst, (int)addr);
            }
            goto DONE_CALL;
        }

        // Else
        // This is call direct where we know the target, thus we can
        // use a direct call; the target to jump to is in iiaAddr.
        assert(id->idInsFmt() == IF_METHOD);

        // Output the call opcode followed by the target distance
        dst += (ins == INS_l_jmp) ? emitOutputByte(dst, insCode(ins)) : emitOutputByte(dst, insCodeMI(ins));

        ssize_t offset;
#ifdef _TARGET_AMD64_
        // All REL32 on Amd64 go through recordRelocation.  Here we will output zero to advance dst.
        offset = 0; 
        assert(id->idIsDspReloc());
#else
        // Calculate PC relative displacement.
        // Although you think we should be using sizeof(void*), the x86 and x64 instruction set
        // only allow a 32-bit offset, so we correctly use sizeof(INT32)
        offset = addr - (dst + sizeof(INT32));
#endif

        dst += emitOutputLong(dst, offset);

#ifdef RELOC_SUPPORT
        if (id->idIsDspReloc())
        {
            emitRecordRelocation((void*)(dst - sizeof(INT32)), addr, IMAGE_REL_BASED_REL32);
        }
#endif

    DONE_CALL:

        /* We update the GC info before the call as the variables cannot be
           used by the call. Killing variables before the call helps with
           boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
           If we ever track aliased variables (which could be used by the
           call), we would have to keep them alive past the call. 
         */
        assert(FitsIn<unsigned char>(dst - *dp));
        callInstrSize = static_cast<unsigned char>(dst - *dp);
        emitUpdateLiveGCvars(GCvars, *dp);

        // If the method returns a GC ref, mark EAX appropriately
        if (id->idGCref() == GCT_GCREF)
            gcrefRegs |= RBM_EAX;
        else if  (id->idGCref() == GCT_BYREF)
            byrefRegs |= RBM_EAX;

        // If the GC register set has changed, report the new set
        if (gcrefRegs != emitThisGCrefRegs)
            emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);

        if (byrefRegs != emitThisByrefRegs)
            emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);

        if (recCall || args)
        {
            // For callee-pop, all arguments will be popped  after the call.
            // For caller-pop, any GC arguments will go dead after the call.

            assert(callInstrSize != 0);

            if (args >= 0)
                emitStackPop(dst, /*isCall*/true, callInstrSize, args);
            else
                emitStackKillArgs(dst, -args, callInstrSize);
        }

        // Do we need to record a call location for GC purposes?
        if  (!emitFullGCinfo && recCall)
        {
            assert(callInstrSize != 0);
            emitRecordGCcall(dst, callInstrSize);
        }

#ifdef DEBUG
        if (ins == INS_call)
        {
            emitRecordCallSite(emitCurCodeOffs(*dp),
                               id->idDebugOnlyInfo()->idCallSig,
                               (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
        }
#endif // DEBUG

        break;

        /********************************************************************/
        /*                      One register operand                        */
        /********************************************************************/

    case IF_RRD:
    case IF_RWR:
    case IF_RRW:
        dst = emitOutputR(dst, id);
        sz = TINY_IDSC_SIZE;
        break;

        /********************************************************************/
        /*                 Register and register/constant                   */
        /********************************************************************/

    case IF_RRW_SHF:
        code = insCodeMR(ins);
        // Emit the VEX prefix if it exists
        code = AddVexPrefixIfNeeded(ins, code, size);
        code = insEncodeMRreg(ins, id->idReg1(), size, code);

        // set the W bit
        if (size != EA_1BYTE)
        {
            code |= 1;
        }

        // Emit the REX prefix if it exists
        if (TakesRexWPrefix(ins, size))
        {
            code = AddRexWPrefix(ins, code);
        }

        // Output a size prefix for a 16-bit operand
        if (size == EA_2BYTE)
        {
            dst += emitOutputByte(dst, 0x66);
        }

        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
        dst += emitOutputWord(dst, code);
        dst += emitOutputByte(dst, emitGetInsSC(id));
        sz   = emitSizeOfInsDsc(id);
        break;

    case IF_RRD_RRD:
    case IF_RWR_RRD:
    case IF_RRW_RRD:
    case IF_RRW_RRW:
        dst = emitOutputRR(dst, id);
        sz  = TINY_IDSC_SIZE;
        break;

    case IF_RRD_CNS:
    case IF_RWR_CNS:
    case IF_RRW_CNS:
        dst = emitOutputRI(dst, id);
        sz  = emitSizeOfInsDsc(id);
        break;

#ifdef FEATURE_AVX_SUPPORT
    case IF_RWR_RRD_RRD:
        dst = emitOutputRRR(dst, id);
        sz = emitSizeOfInsDsc(id);
        break;
#endif

    case IF_RRW_RRW_CNS:
        assert(id->idGCref() == GCT_NONE);

        // Get the 'base' opcode (it's a big one)
        // Also, determine which operand goes where in the ModRM byte.
        regNumber mReg;
        regNumber rReg;
        // if (ins == INS_shld || ins == INS_shrd || ins == INS_vextractf128 || ins == INS_vinsertf128)
        if (hasCodeMR(ins))
        {
            code = insCodeMR(ins);
            // Emit the VEX prefix if it exists
            code = AddVexPrefixIfNeeded(ins, code, size);
            code = insEncodeMRreg(ins, code);
            mReg = id->idReg1();
            rReg = id->idReg2();
        }
        else
        {
            code = insCodeRM(ins);
            // Emit the VEX prefix if it exists
            code = AddVexPrefixIfNeeded(ins, code, size);
            code = insEncodeRMreg(ins, code);
            mReg = id->idReg2();
            rReg = id->idReg1();
        }
        assert(code & 0x00FF0000);

#ifdef FEATURE_AVX_SUPPORT
        if (TakesVexPrefix(ins))
        {
            if (IsThreeOperandBinaryAVXInstruction(ins))
            {
                // Encode source/dest operand reg in 'vvvv' bits in 1's complement form
                // This code will have to change when we support 3 operands.
                // For now, we always overload this source with the destination (always reg1).
                // (Though we will need to handle the few ops that can have the 'vvvv' bits as destination,
                // e.g. pslldq, when/if we support those instructions with 2 registers.)
                // (see x64 manual Table 2-9. Instructions with a VEX.vvvv destination)
                code = insEncodeReg3456(ins, id->idReg1(), size, code);        
            }
            else if (IsThreeOperandMoveAVXInstruction(ins))
            {
                // This is a "merge" move instruction.
                // Encode source operand reg in 'vvvv' bits in 1's complement form
                code = insEncodeReg3456(ins, id->idReg2(), size, code);        
            }
        }
#endif // FEATURE_AVX_SUPPORT

        regcode = (insEncodeReg345(ins, rReg, size, &code) |
                   insEncodeReg012(ins, mReg, size, &code)) << 8;

        // Output the REX prefix
        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);

        if (UseAVX() && Is4ByteAVXInstruction(ins))
        {
            // We just need to output the last byte of the opcode.
            assert((code & 0xFF) == 0);
            assert((code & 0xFF00) != 0xC000);
            dst += emitOutputByte(dst, (code >> 8) & 0xFF);
            code = 0;
        }
        else if  (code & 0xFF000000)
        {
            dst += emitOutputWord(dst, code >> 16);
            code &= 0x0000FFFF;
        }
        else if (code & 0x00FF0000)
        {
            dst += emitOutputByte(dst, code >> 16);
            code &= 0x0000FFFF;
        }

        // Note that regcode is shifted by 8-bits above to align with RM byte.
        if (code != 0)
        {
            assert((code & 0xFF00) == 0xC000);
            dst += emitOutputWord(dst, code | regcode);
        }
        else
        {
            // This case occurs for AVX instructions.
            // Note that regcode is left shifted by 8-bits.
            assert(Is4ByteAVXInstruction(ins));
            dst += emitOutputByte(dst, 0xC0 | (regcode >> 8));
        }

        dst += emitOutputByte(dst, emitGetInsSC(id));
        sz   = emitSizeOfInsDsc(id);
        break;

        /********************************************************************/
        /*                      Address mode operand                        */
        /********************************************************************/

    case IF_ARD:
    case IF_AWR:
    case IF_ARW:

#if FEATURE_STACK_FP_X87

    case IF_TRD_ARD:
    case IF_TWR_ARD:
    case IF_TRW_ARD:

//  case IF_ARD_TRD:
    case IF_AWR_TRD:
//  case IF_ARW_TRD:

#endif // FEATURE_STACK_FP_X87

        dst = emitCodeWithInstructionSize(dst, emitOutputAM(dst, id, insCodeMR(ins)), &callInstrSize);

        switch (ins)
        {
        case INS_call:

    IND_CALL:
            // Get hold of the argument count and method handle
            args = emitGetInsCIargs(id);

            // Is this a "fat" call descriptor?
            if  (id->idIsLargeCall())
            {
                instrDescCGCA* idCall = (instrDescCGCA*) id;

                gcrefRegs   = idCall->idcGcrefRegs;
                byrefRegs   = idCall->idcByrefRegs;
                VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
                sz          = sizeof(instrDescCGCA);
            }
            else
            {
                assert(!id->idIsLargeDsp());
                assert(!id->idIsLargeCns());

                gcrefRegs   = emitDecodeCallGCregs(id);
                byrefRegs   = 0;
                VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
                sz          = sizeof(instrDesc);
            }

            recCall = true;

            goto DONE_CALL;

        default:
            sz = emitSizeOfInsDsc(id);
            break;
        }
        break;

    case IF_RRD_ARD:
    case IF_RWR_ARD:
    case IF_RRW_ARD:
        code = insCodeRM(ins);
        code = AddVexPrefixIfNeeded(ins, code, size);
        regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
        dst = emitOutputAM(dst, id, code | regcode);
        sz  = emitSizeOfInsDsc(id);
        break;

    case IF_ARD_RRD:
    case IF_AWR_RRD:
    case IF_ARW_RRD:
        code = insCodeMR(ins);
        code = AddVexPrefixIfNeeded(ins, code, size);
        regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
        dst = emitOutputAM(dst, id, code | regcode);
        sz  = emitSizeOfInsDsc(id);
        break;

    case IF_ARD_CNS:
    case IF_AWR_CNS:
    case IF_ARW_CNS:
        emitGetInsAmdCns(id, &cnsVal);
        dst = emitOutputAM(dst, id, insCodeMI(ins), &cnsVal);
        sz  = emitSizeOfInsDsc(id);
        break;

    case IF_ARW_SHF:
        emitGetInsAmdCns(id, &cnsVal);
        dst = emitOutputAM(dst, id, insCodeMR(ins), &cnsVal);
        sz  = emitSizeOfInsDsc(id);
        break;

        /********************************************************************/
        /*                      Stack-based operand                         */
        /********************************************************************/

    case IF_SRD:
    case IF_SWR:
    case IF_SRW:

#if FEATURE_STACK_FP_X87

    case IF_TRD_SRD:
    case IF_TWR_SRD:
    case IF_TRW_SRD:

//  case IF_SRD_TRD:
    case IF_SWR_TRD:
//  case IF_SRW_TRD:

#endif // FEATURE_STACK_FP_X87

        assert(ins != INS_pop_hide);
        if  (ins == INS_pop)
        {
            // The offset in "pop [ESP+xxx]" is relative to the new ESP value

#if !FEATURE_FIXED_OUT_ARGS
            emitCurStackLvl -= sizeof(int);
#endif
            dst = emitOutputSV(dst, id, insCodeMR(ins));

#if !FEATURE_FIXED_OUT_ARGS
            emitCurStackLvl += sizeof(int);
#endif
            break;
        }

        dst = emitCodeWithInstructionSize(dst, emitOutputSV(dst, id, insCodeMR(ins)), &callInstrSize);

        if (ins == INS_call)
            goto IND_CALL;

        break;

    case IF_SRD_CNS:
    case IF_SWR_CNS:
    case IF_SRW_CNS:
        emitGetInsCns(id, &cnsVal);
        dst = emitOutputSV(dst, id, insCodeMI(ins), &cnsVal);
        sz  = emitSizeOfInsDsc(id);
        break;

    case IF_SRW_SHF:
        emitGetInsCns(id, &cnsVal);
        dst = emitOutputSV(dst, id, insCodeMR(ins), &cnsVal);
        sz  = emitSizeOfInsDsc(id);
        break;

    case IF_RRD_SRD:
    case IF_RWR_SRD:
    case IF_RRW_SRD:
        code = insCodeRM(ins);

        // 4-byte AVX instructions are special cased inside emitOutputSV
        // since they do not have space to encode ModRM byte.
        if (Is4ByteAVXInstruction(ins))
        {
            dst = emitOutputSV(dst, id, code);
        }
        else
        {
            code = AddVexPrefixIfNeeded(ins, code, size);

            // In case of AVX instructions that take 3 operands, encode reg1 as first source.
            // Note that reg1 is both a source and a destination.
            //
            // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
            // now we use the single source as source1 and source2.
            // For this format, moves do not support a third operand, so we only need to handle the binary ops.
            if (IsThreeOperandBinaryAVXInstruction(ins))
            {
                // encode source operand reg in 'vvvv' bits in 1's compliement form
                code = insEncodeReg3456(ins, id->idReg1(), size, code);
            }

            regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
            dst = emitOutputSV(dst, id, code | regcode);
        }
        break;

    case IF_SRD_RRD:
    case IF_SWR_RRD:
    case IF_SRW_RRD:
        code = insCodeMR(ins);
        code = AddVexPrefixIfNeeded(ins, code, size);

        // In case of AVX instructions that take 3 operands, encode reg1 as first source.
        // Note that reg1 is both a source and a destination.
        //
        // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
        // now we use the single source as source1 and source2.
        // For this format, moves do not support a third operand, so we only need to handle the binary ops.
        if (IsThreeOperandBinaryAVXInstruction(ins))
        {
            // encode source operand reg in 'vvvv' bits in 1's compliement form
            code = insEncodeReg3456(ins, id->idReg1(), size, code);
        }

        regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
        dst = emitOutputSV(dst, id, code | regcode);
        break;

        /********************************************************************/
        /*                    Direct memory address                         */
        /********************************************************************/

    case IF_MRD:
    case IF_MRW:
    case IF_MWR:

#if FEATURE_STACK_FP_X87

    case IF_TRD_MRD:
    case IF_TWR_MRD:
    case IF_TRW_MRD:

//  case IF_MRD_TRD:
    case IF_MWR_TRD:
//  case IF_MRW_TRD:

#endif // FEATURE_STACK_FP_X87

        noway_assert(ins != INS_call);
        dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500);      
        sz = emitSizeOfInsDsc(id);
        break;

    case IF_MRD_OFF:
        dst = emitOutputCV(dst, id, insCodeMI(ins));
        break;

    case IF_RRD_MRD:
    case IF_RWR_MRD:
    case IF_RRW_MRD:
        code = insCodeRM(ins);        
        // Special case 4-byte AVX instructions
        if (Is4ByteAVXInstruction(ins))
        {
            dst = emitOutputCV(dst, id, code);
        }
        else
        {
            code = AddVexPrefixIfNeeded(ins, code, size);

            // In case of AVX instructions that take 3 operands, encode reg1 as first source.
            // Note that reg1 is both a source and a destination.
            //
            // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
            // now we use the single source as source1 and source2.
            // For this format, moves do not support a third operand, so we only need to handle the binary ops.
            if (IsThreeOperandBinaryAVXInstruction(ins))
            {
                // encode source operand reg in 'vvvv' bits in 1's compliement form
                code = insEncodeReg3456(ins, id->idReg1(), size, code);
            }

            regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
            dst = emitOutputCV(dst, id, code | regcode | 0x0500);
        }
        sz  = emitSizeOfInsDsc(id);
        break;

    case IF_RWR_MRD_OFF:
        code = insCode(ins);
        code = AddVexPrefixIfNeeded(ins, code, size);

        // In case of AVX instructions that take 3 operands, encode reg1 as first source.
        // Note that reg1 is both a source and a destination.
        //
        // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
        // now we use the single source as source1 and source2.
        // For this format, moves do not support a third operand, so we only need to handle the binary ops.
        if (IsThreeOperandBinaryAVXInstruction(ins))
        {
            // encode source operand reg in 'vvvv' bits in 1's compliement form
            code = insEncodeReg3456(ins, id->idReg1(), size, code);
        }

        regcode = insEncodeReg012(id->idIns(), id->idReg1(), size, &code);
        dst = emitOutputCV(dst, id, code | 0x30 | regcode);
        sz  = emitSizeOfInsDsc(id);
        break;

    case IF_MRD_RRD:
    case IF_MWR_RRD:
    case IF_MRW_RRD:
        code = insCodeMR(ins);
#ifdef FEATURE_AVX_SUPPORT
        code = AddVexPrefixIfNeeded(ins, code, size);

        // In case of AVX instructions that take 3 operands, encode reg1 as first source.
        // Note that reg1 is both a source and a destination.
        //
        // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
        // now we use the single source as source1 and source2.
        // For this format, moves do not support a third operand, so we only need to handle the binary ops.
        if (IsThreeOperandBinaryAVXInstruction(ins))
        {
            // encode source operand reg in 'vvvv' bits in 1's compliement form
            code = insEncodeReg3456(ins, id->idReg1(), size, code);
        }
#endif // FEATURE_AVX_SUPPORT

        regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
        dst = emitOutputCV(dst, id, code | regcode | 0x0500);
        sz  = emitSizeOfInsDsc(id);
        break;

    case IF_MRD_CNS:
    case IF_MWR_CNS:
    case IF_MRW_CNS:
        emitGetInsDcmCns(id, &cnsVal);
        dst = emitOutputCV(dst, id, insCodeMI(ins) | 0x0500, &cnsVal);
        sz  = emitSizeOfInsDsc(id);
        break;

    case IF_MRW_SHF:
        emitGetInsDcmCns(id, &cnsVal);
        dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500, &cnsVal);
        sz  = emitSizeOfInsDsc(id);
        break;

#if FEATURE_STACK_FP_X87

        /********************************************************************/
        /*                  FP coprocessor stack operands                   */
        /********************************************************************/

    case IF_TRD_FRD:
    case IF_TWR_FRD:
    case IF_TRW_FRD:
        assert(id->idGCref() == GCT_NONE);
        dst += emitOutputWord(dst, insCodeMR(ins) | 0xC000 | (id->idReg1() << 8));
        break;

    case IF_FRD_TRD:
    case IF_FWR_TRD:
    case IF_FRW_TRD:
        assert(id->idGCref() == GCT_NONE);
        dst += emitOutputWord(dst, insCodeMR(ins) | 0xC004 | (id->idReg1() << 8));
        break;

#endif // FEATURE_STACK_FP_X87

        /********************************************************************/
        /*                            oops                                  */
        /********************************************************************/

    default:

#ifdef  DEBUG
        printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
        assert(!"don't know how to encode this instruction");
#endif
        break;
    }

    // Make sure we set the instruction descriptor size correctly
    assert(sz == emitSizeOfInsDsc(id));

#if !FEATURE_FIXED_OUT_ARGS

    // Make sure we keep the current stack level up to date
    if  (!emitIGisInProlog(ig) && !emitIGisInEpilog(ig))
    {
        switch (ins)
        {
        case INS_push:
            // Please note: {INS_push_hide,IF_LABEL} is used to push the address of the
            // finally block for calling it locally for an op_leave.
            emitStackPush(dst, id->idGCref());
            break;

        case INS_pop:
            emitStackPop(dst, false, /*callInstrSize*/0, 1);
            break;

        case INS_sub:
            // Check for "sub ESP, icon"
            if  (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS
                                && id->idReg1()   == REG_ESP)
            {
                assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
                emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / sizeof(void*)));
            }
            break;

        case INS_add:
            // Check for "add ESP, icon"
            if  (ins == INS_add && id->idInsFmt() == IF_RRW_CNS
                                && id->idReg1()   == REG_ESP)
            {
                assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
                emitStackPop(dst, /*isCall*/false, /*callInstrSize*/0, (unsigned)(emitGetInsSC(id) / sizeof(void*)));
            }
            break;

        default:
            break;
        }
    }

#endif // !FEATURE_FIXED_OUT_ARGS

    assert((int)emitCurStackLvl >= 0);

    // Only epilog "instructions" and some pseudo-instrs
    // are allowed not to generate any code 

    assert(*dp != dst || emitInstHasNoCode(ins));

#ifdef  DEBUG
    if  (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
    {
        emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp));
    }

    if (emitComp->compDebugBreak)
    {
        // set JitEmitPrintRefRegs=1 will print out emitThisGCrefRegs and emitThisByrefRegs
        // at the beginning of this method.
        static ConfigDWORD fJitEmitPrintRefRegs;
        if (fJitEmitPrintRefRegs.val(CLRConfig::INTERNAL_JitEmitPrintRefRegs) != 0)
        {
            printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum);
            printf("  emitThisGCrefRegs(0x%p)=", emitComp->dspPtr(&emitThisGCrefRegs));
                printRegMaskInt(emitThisGCrefRegs);
                emitDispRegSet (emitThisGCrefRegs);
                printf("\n");
            printf("  emitThisByrefRegs(0x%p)=", emitComp->dspPtr(&emitThisByrefRegs));
                printRegMaskInt(emitThisByrefRegs);
                emitDispRegSet (emitThisByrefRegs);
                printf("\n");
        }

        // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
        // emitting instruction a6, (i.e. IN00a6 in jitdump).
        static ConfigDWORD fJitBreakEmitOutputInstr;
        if ((unsigned)fJitBreakEmitOutputInstr.val(CLRConfig::INTERNAL_JitBreakEmitOutputInstr) == id->idDebugOnlyInfo()->idNum)
        {
            assert(!"JitBreakEmitOutputInstr reached");
        }
    }
#endif

#ifdef  TRANSLATE_PDB
    if (*dp != dst)
    {
        // only map instruction groups to instruction groups
        MapCode(id->idDebugOnlyInfo()->idilStart, *dp);
    }
#endif

    *dp = dst;

#ifdef DEBUG
    if (ins == INS_mulEAX || ins == INS_imulEAX)
    {
        // INS_mulEAX has implicit target of Edx:Eax. Make sure
        // that we detected this cleared its GC-status.

        assert(((RBM_EAX|RBM_EDX) & (emitThisGCrefRegs|emitThisByrefRegs)) == 0);
    }

    if (instrIs3opImul(ins))
    {
        // The target of the 3-operand imul is implicitly encoded. Make sure
        // that we detected the implicit register and cleared its GC-status.

        regMaskTP regMask = genRegMask(inst3opImulReg(ins));
        assert((regMask & (emitThisGCrefRegs|emitThisByrefRegs)) == 0);
    }
#endif

    return  sz;
}
#ifdef _PREFAST_
#pragma warning(pop)
#endif

/*****************************************************************************/
/*****************************************************************************/

#endif // defined(_TARGET_XARCH_)