summaryrefslogtreecommitdiff
path: root/src/jit/codegenarm64.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit/codegenarm64.cpp')
-rw-r--r--src/jit/codegenarm64.cpp8687
1 files changed, 8687 insertions, 0 deletions
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
new file mode 100644
index 0000000000..71b238308d
--- /dev/null
+++ b/src/jit/codegenarm64.cpp
@@ -0,0 +1,8687 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+//
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Arm64 Code Generator XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_ARM64_
+#include "emit.h"
+#include "codegen.h"
+#include "lower.h"
+#include "gcinfo.h"
+#include "gcinfoencoder.h"
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Prolog / Epilog XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+//------------------------------------------------------------------------
+// genStackPointerAdjustment: add a specified constant value to the stack pointer in either the prolog
+// or the epilog. The unwind codes for the generated instructions are produced. An available temporary
+// register is required to be specified, in case the constant is too large to encode in an "add"
+// instruction (or "sub" instruction if we choose to use one), such that we need to load the constant
+// into a register first, before using it.
+//
+// Arguments:
+// spDelta - the value to add to SP (can be negative)
+// tmpReg - an available temporary register
+// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+// Otherwise, we don't touch it.
+//
+// Return Value:
+// None.
+
+void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
+{
+ unsigned unwindSpDelta;
+
+ if (emitter::emitIns_valid_imm_for_add(spDelta, EA_8BYTE))
+ {
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta);
+
+ unwindSpDelta = (unsigned)abs(spDelta);
+ }
+ else
+ {
+ bool adjustmentIsNegative = (spDelta < 0);
+ spDelta = abs(spDelta);
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, spDelta);
+ if (pTmpRegIsZero != nullptr)
+ {
+ *pTmpRegIsZero = false;
+ }
+ compiler->unwindPadding();
+
+ getEmitter()->emitIns_R_R_R(adjustmentIsNegative ? INS_sub : INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, tmpReg);
+
+ unwindSpDelta = (unsigned)spDelta;
+ }
+
+ // spDelta is negative in the prolog, positive in the epilog, but we always tell the unwind codes the positive value.
+ compiler->unwindAllocStack(unwindSpDelta);
+}
+
+//------------------------------------------------------------------------
+// genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet prolog.
+// If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction.
+// The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that instruction.
+//
+// Arguments:
+// reg1 - First register of pair to save.
+// reg2 - Second register of pair to save.
+// spOffset - The offset from SP to store reg1 (must be positive or zero).
+// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or zero).
+// lastSavedWasPreviousPair - True if the last prolog instruction was to save the previous register pair. This allows us to
+// emit the "save_next" unwind code.
+// tmpReg - An available temporary register. Needed for the case of large frames.
+// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+// Otherwise, we don't touch it.
+//
+// Return Value:
+// None.
+
+void CodeGen::genPrologSaveRegPair(regNumber reg1,
+ regNumber reg2,
+ int spOffset,
+ int spDelta,
+ bool lastSavedWasPreviousPair,
+ regNumber tmpReg,
+ bool* pTmpRegIsZero)
+{
+ assert(spOffset >= 0);
+ assert(spDelta <= 0);
+ assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+ assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both FP/SIMD
+
+ bool needToSaveRegs = true;
+ if (spDelta != 0)
+ {
+ if ((spOffset == 0) && (spDelta >= -512))
+ {
+ // We can use pre-indexed addressing.
+ // stp REG, REG + 1, [SP, #spDelta]!
+ // 64-bit STP offset range: -512 to 504, multiple of 8.
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX);
+ compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta);
+
+ needToSaveRegs = false;
+ }
+ else
+ {
+ // We need to do SP adjustment separately from the store; we can't fold in a pre-indexed addressing and the non-zero offset.
+ genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
+ }
+ }
+
+ if (needToSaveRegs)
+ {
+ // stp REG, REG + 1, [SP, #offset]
+ // 64-bit STP offset range: -512 to 504, multiple of 8.
+ assert(spOffset <= 504);
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
+
+ if (lastSavedWasPreviousPair)
+ {
+ // This works as long as we've only been saving pairs, in order, and we've saved the previous one just before this one.
+ compiler->unwindSaveNext();
+ }
+ else
+ {
+ compiler->unwindSaveRegPair(reg1, reg2, spOffset);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// genPrologSaveRegPair: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or floating-point/SIMD register
+// in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0), then spOffset must be 8. This is because
+// otherwise we would create an alignment hole above the saved register, not below it, which we currently don't support. This restriction
+// could be loosened if the callers change to handle it (and this function changes to support using pre-indexed STR addressing).
+// The caller must ensure that we can use the STR instruction, and that spOffset will be in the legal range for that instruction.
+//
+// Arguments:
+// reg1 - Register to save.
+// spOffset - The offset from SP to store reg1 (must be positive or zero).
+// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or zero).
+// tmpReg - An available temporary register. Needed for the case of large frames.
+// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+// Otherwise, we don't touch it.
+//
+// Return Value:
+// None.
+
+void CodeGen::genPrologSaveReg(regNumber reg1,
+ int spOffset,
+ int spDelta,
+ regNumber tmpReg,
+ bool* pTmpRegIsZero)
+{
+ assert(spOffset >= 0);
+ assert(spDelta <= 0);
+ assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+
+ if (spDelta != 0)
+ {
+ // If saving a single callee-save register, and we need to change SP, the offset cannot be zero. It must be 8 to account
+ // for alignment.
+ assert(spOffset != 0);
+ assert(spOffset == REGSIZE_BYTES);
+
+ genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
+ }
+
+ // str REG, [SP, #offset]
+ // 64-bit STR offset range: 0 to 32760, multiple of 8.
+ getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+ compiler->unwindSaveReg(reg1, spOffset);
+}
+
+//------------------------------------------------------------------------
+// genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog.
+// The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing.
+// The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that instruction.
+//
+// Arguments:
+// reg1 - First register of pair to restore.
+// reg2 - Second register of pair to restore.
+// spOffset - The offset from SP to load reg1 (must be positive or zero).
+// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or zero).
+// tmpReg - An available temporary register. Needed for the case of large frames.
+// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+// Otherwise, we don't touch it.
+//
+// Return Value:
+// None.
+
+void CodeGen::genEpilogRestoreRegPair(regNumber reg1,
+ regNumber reg2,
+ int spOffset,
+ int spDelta,
+ regNumber tmpReg,
+ bool* pTmpRegIsZero)
+{
+ assert(spOffset >= 0);
+ assert(spDelta >= 0);
+ assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+
+ if (spDelta != 0)
+ {
+ if ((spOffset == 0) && (spDelta <= 504))
+ {
+ // Fold the SP change into this instruction.
+ // ldp reg1, reg2, [SP], #spDelta
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX);
+ compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta);
+ }
+ else
+ {
+ // Can't fold in the SP change; need to use a separate ADD instruction.
+
+ // ldp reg1, reg2, [SP, #offset]
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
+ compiler->unwindSaveRegPair(reg1, reg2, spOffset);
+
+ genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
+ }
+ }
+ else
+ {
+ // ldp reg1, reg2, [SP, #offset]
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
+ compiler->unwindSaveRegPair(reg1, reg2, spOffset);
+ }
+}
+
+//------------------------------------------------------------------------
+// genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog.
+//
+// Arguments:
+// reg1 - Register to restore.
+// spOffset - The offset from SP to restore reg1 (must be positive or zero).
+// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or zero).
+// tmpReg - An available temporary register. Needed for the case of large frames.
+// pTmpRegIsZero - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+// Otherwise, we don't touch it.
+//
+// Return Value:
+// None.
+
+void CodeGen::genEpilogRestoreReg(regNumber reg1,
+ int spOffset,
+ int spDelta,
+ regNumber tmpReg,
+ bool* pTmpRegIsZero)
+{
+ assert(spOffset >= 0);
+ assert(spDelta >= 0);
+ assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+
+ // ldr reg1, [SP, #offset]
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+ compiler->unwindSaveReg(reg1, spOffset);
+
+ if (spDelta != 0)
+ {
+ assert(spOffset != 0);
+ genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
+ }
+}
+
+//------------------------------------------------------------------------
+// genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame
+// in the function or funclet prolog. The save set does not contain FP, since that is
+// guaranteed to be saved separately, so we can set up chaining. We can only use the instructions
+// that are allowed by the unwind codes. Integer registers are stored at lower addresses,
+// FP/SIMD registers are stored at higher addresses. There are no gaps. The caller ensures that
+// there is enough space on the frame to store these registers, and that the store instructions
+// we need to use (STR or STP) are encodable with the stack-pointer immediate offsets we need to
+// use. Note that the save set can contain LR if this is a frame without a frame pointer, in
+// which case LR is saved along with the other callee-saved registers. The caller can tell us
+// to fold in a stack pointer adjustment, which we will do with the first instruction. Note that
+// the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the
+// stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved
+// registers, though, we will have an empty aligment slot somewhere. It turns out we will put
+// it below (at a lower address) the callee-saved registers, as that is currently how we
+// do frame layout. This means that the first stack offset will be 8 and the stack pointer
+// adjustment must be done by a SUB, and not folded in to a pre-indexed store.
+//
+// Arguments:
+// regsToSaveMask - The mask of callee-saved registers to save. If empty, this function does nothing.
+// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that
+// if non-zero spDelta, then this is the offset of the first save *after* that
+// SP adjustment.
+// spDelta - If non-zero, the amount to add to SP before the register saves (must be negative or zero).
+//
+// Return Value:
+// None.
+
+void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask,
+ int lowestCalleeSavedOffset,
+ int spDelta)
+{
+ unsigned regsToSaveCount = genCountBits(regsToSaveMask);
+ if (regsToSaveCount == 0)
+ {
+ return;
+ }
+
+ assert(spDelta <= 0);
+ assert((spDelta % 16) == 0);
+ assert((regsToSaveMask & RBM_FP) == 0); // we never save FP here
+ assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in RBM_CALLEE_SAVED.
+
+ regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
+ regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat;
+
+ int spOffset = lowestCalleeSavedOffset; // this is the offset *after* we change SP.
+
+ if (maskSaveRegsInt != RBM_NONE)
+ {
+ // Save the integer registers
+
+ unsigned intRegsToSaveCount = genCountBits(maskSaveRegsInt);
+ bool lastSavedWasPair = false;
+
+ while (maskSaveRegsInt != RBM_NONE)
+ {
+ regMaskTP reg1Mask = genFindLowestBit(maskSaveRegsInt);
+ regNumber reg1 = genRegNumFromMask(reg1Mask);
+ maskSaveRegsInt &= ~reg1Mask;
+
+ if (intRegsToSaveCount >= 2)
+ {
+ // We can use a STP instruction.
+
+ regMaskTP reg2Mask = genFindLowestBit(maskSaveRegsInt);
+ regNumber reg2 = genRegNumFromMask(reg2Mask);
+ assert((reg2 == REG_NEXT(reg1)) || (reg2 == REG_LR));
+ maskSaveRegsInt &= ~reg2Mask;
+
+ genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
+
+ // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating this epilog, to
+ // get the codes to match. Turn this off until that is better understood.
+ // lastSavedWasPair = true;
+
+ intRegsToSaveCount -= 2;
+ spOffset += 2 * REGSIZE_BYTES;
+ }
+ else
+ {
+ // No register pair; we use a STR instruction.
+
+ assert(intRegsToSaveCount == 1); // this will be the last store we do
+
+ genPrologSaveReg(reg1, spOffset, spDelta, REG_IP0, nullptr);
+
+ lastSavedWasPair = false;
+
+ intRegsToSaveCount -= 1;
+ spOffset += REGSIZE_BYTES;
+ }
+
+ spDelta = 0; // We've now changed SP already, if necessary; don't do it again.
+ }
+
+ assert(intRegsToSaveCount == 0);
+ }
+
+ if (maskSaveRegsFloat != RBM_NONE)
+ {
+ // Save the floating-point/SIMD registers
+
+ unsigned floatRegsToSaveCount = genCountBits(maskSaveRegsFloat);
+ bool lastSavedWasPair = false;
+
+ while (maskSaveRegsFloat != RBM_NONE)
+ {
+ regMaskTP reg1Mask = genFindLowestBit(maskSaveRegsFloat);
+ regNumber reg1 = genRegNumFromMask(reg1Mask);
+ maskSaveRegsFloat &= ~reg1Mask;
+
+ if (floatRegsToSaveCount >= 2)
+ {
+ // We can use a STP instruction.
+
+ regMaskTP reg2Mask = genFindLowestBit(maskSaveRegsFloat);
+ regNumber reg2 = genRegNumFromMask(reg2Mask);
+ assert(reg2 == REG_NEXT(reg1));
+ maskSaveRegsFloat &= ~reg2Mask;
+
+ genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
+
+ // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating this epilog, to
+ // get the codes to match. Turn this off until that is better understood.
+ // lastSavedWasPair = true;
+
+ floatRegsToSaveCount -= 2;
+ spOffset += 2 * FPSAVE_REGSIZE_BYTES;
+ }
+ else
+ {
+ // No register pair; we use a STR instruction.
+
+ assert(floatRegsToSaveCount == 1);
+
+ genPrologSaveReg(reg1, spOffset, spDelta, REG_IP0, nullptr);
+
+ lastSavedWasPair = false;
+
+ floatRegsToSaveCount -= 1;
+ spOffset += FPSAVE_REGSIZE_BYTES;
+ }
+
+ spDelta = 0; // We've now changed SP already, if necessary; don't do it again.
+ }
+
+ assert(floatRegsToSaveCount == 0);
+ }
+}
+
+
+//------------------------------------------------------------------------
+// genRestoreCalleeSavedRegistersHelp: Restore the callee-saved registers in 'regsToRestoreMask' from the stack frame
+// in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp().
+//
+// Arguments:
+// regsToRestoreMask - The mask of callee-saved registers to restore. If empty, this function does nothing.
+// lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area.
+// spDelta - If non-zero, the amount to add to SP after the register restores (must be positive or zero).
+//
+// Here's an example restore sequence:
+// ldp x27, x28, [sp,#96]
+// ldp x25, x26, [sp,#80]
+// ldp x23, x24, [sp,#64]
+// ldp x21, x22, [sp,#48]
+// ldp x19, x20, [sp,#32]
+//
+// For the case of non-zero spDelta, we assume the base of the callee-save registers to restore is at SP, and
+// the last restore adjusts SP by the specified amount. For example:
+// ldp x27, x28, [sp,#64]
+// ldp x25, x26, [sp,#48]
+// ldp x23, x24, [sp,#32]
+// ldp x21, x22, [sp,#16]
+// ldp x19, x20, [sp], #80
+//
+// Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when generating
+// a post-indexed load, you call the unwind function for specifying the corresponding preindexed store.
+//
+// Return Value:
+// None.
+
+void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask,
+ int lowestCalleeSavedOffset,
+ int spDelta)
+{
+ unsigned regsToRestoreCount = genCountBits(regsToRestoreMask);
+ if (regsToRestoreCount == 0)
+ {
+ return;
+ }
+
+ assert(spDelta >= 0);
+ assert((spDelta % 16) == 0);
+ assert((regsToRestoreMask & RBM_FP) == 0); // we never restore FP here
+ assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in RBM_CALLEE_SAVED.
+
+ regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT;
+ regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat;
+
+ assert(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES);
+ int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES; // Point past the end, to start. We predecrement to find the offset to load from.
+
+ // We want to restore in the opposite order we saved, so the unwind codes match. Be careful to handle odd numbers of
+ // callee-saved registers properly.
+
+ if (maskRestoreRegsFloat != RBM_NONE)
+ {
+ // Restore the floating-point/SIMD registers
+
+ unsigned floatRegsToRestoreCount = genCountBits(maskRestoreRegsFloat);
+
+ while (maskRestoreRegsFloat != RBM_NONE)
+ {
+ if ((floatRegsToRestoreCount % 2) == 0)
+ {
+ assert(floatRegsToRestoreCount >= 2);
+
+ regMaskTP reg2Mask = genFindHighestBit(maskRestoreRegsFloat);
+ regNumber reg2 = genRegNumFromMask(reg2Mask);
+ maskRestoreRegsFloat &= ~reg2Mask;
+
+ regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsFloat);
+ regNumber reg1 = genRegNumFromMask(reg1Mask);
+ maskRestoreRegsFloat &= ~reg1Mask;
+
+ spOffset -= 2 * FPSAVE_REGSIZE_BYTES;
+
+ // Is this the last restore instruction? And have we've been told to adjust SP?
+ bool thisIsTheLastRestoreInstruction = (floatRegsToRestoreCount == 2) && (maskRestoreRegsInt == RBM_NONE);
+ genEpilogRestoreRegPair(reg1, reg2, spOffset, thisIsTheLastRestoreInstruction ? spDelta : 0, REG_IP0, nullptr);
+
+ floatRegsToRestoreCount -= 2;
+ }
+ else
+ {
+ // We do the odd register first when restoring, last when saving.
+ assert((floatRegsToRestoreCount % 2) == 1);
+
+ regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsFloat);
+ regNumber reg1 = genRegNumFromMask(reg1Mask);
+ maskRestoreRegsFloat &= ~reg1Mask;
+
+ spOffset -= FPSAVE_REGSIZE_BYTES;
+
+ // Is this the last restore instruction? And have we've been told to adjust SP?
+ bool thisIsTheLastRestoreInstruction = (floatRegsToRestoreCount == 1) && (maskRestoreRegsInt == RBM_NONE);
+ genEpilogRestoreReg(reg1, spOffset, thisIsTheLastRestoreInstruction ? spDelta : 0, REG_IP0, nullptr);
+
+ floatRegsToRestoreCount -= 1;
+ }
+ }
+
+ assert(floatRegsToRestoreCount == 0);
+ }
+
+ if (maskRestoreRegsInt != RBM_NONE)
+ {
+ // Restore the integer registers
+
+ unsigned intRegsToRestoreCount = genCountBits(maskRestoreRegsInt);
+
+ while (maskRestoreRegsInt != RBM_NONE)
+ {
+ if ((intRegsToRestoreCount % 2) == 0)
+ {
+ assert(intRegsToRestoreCount >= 2);
+
+ regMaskTP reg2Mask = genFindHighestBit(maskRestoreRegsInt);
+ regNumber reg2 = genRegNumFromMask(reg2Mask);
+ maskRestoreRegsInt &= ~reg2Mask;
+
+ regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsInt);
+ regNumber reg1 = genRegNumFromMask(reg1Mask);
+ maskRestoreRegsInt &= ~reg1Mask;
+
+ spOffset -= 2 * REGSIZE_BYTES;
+
+ // Is this the last restore instruction? And have we've been told to adjust SP?
+ bool thisIsTheLastRestoreInstruction = (intRegsToRestoreCount == 2);
+ genEpilogRestoreRegPair(reg1, reg2, spOffset, thisIsTheLastRestoreInstruction ? spDelta : 0, REG_IP0, nullptr);
+
+ intRegsToRestoreCount -= 2;
+ }
+ else
+ {
+ // We do the odd register first when restoring, last when saving.
+ assert((intRegsToRestoreCount % 2) == 1);
+
+ regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsInt);
+ regNumber reg1 = genRegNumFromMask(reg1Mask);
+ maskRestoreRegsInt &= ~reg1Mask;
+
+ spOffset -= REGSIZE_BYTES;
+
+ // Is this the last restore instruction? And have we've been told to adjust SP?
+ bool thisIsTheLastRestoreInstruction = (intRegsToRestoreCount == 1);
+ genEpilogRestoreReg(reg1, spOffset, thisIsTheLastRestoreInstruction ? spDelta : 0, REG_IP0, nullptr);
+
+ intRegsToRestoreCount -= 1;
+ }
+ }
+
+ assert(intRegsToRestoreCount == 0);
+ }
+}
+
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet prolog.
+ *
+ * Funclets have the following incoming arguments:
+ *
+ * catch: x0 = the exception object that was caught (see GT_CATCH_ARG)
+ * filter: x0 = the exception object to filter (see GT_CATCH_ARG), x1 = CallerSP of the containing function
+ * finally/fault: none
+ *
+ * Funclets set the following registers on exit:
+ *
+ * catch: x0 = the address at which execution should resume (see BBJ_EHCATCHRET)
+ * filter: x0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
+ * finally/fault: none
+ *
+ * The ARM64 funclet prolog sequence is one of the following (Note: #framesz is total funclet frame size,
+ * including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16):
+ *
+ * Frame type 1:
+ * For #outsz == 0 and #framesz <= 512:
+ * stp fp,lr,[sp,-#framesz]! ; establish the frame, save FP/LR
+ * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
+ *
+ * The funclet frame is thus:
+ *
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * |Callee saved registers | // multiple of 8 bytes
+ * |-----------------------|
+ * | PSP slot | // 8 bytes
+ * |-----------------------|
+ * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
+ * |-----------------------|
+ * | Saved FP, LR | // 16 bytes
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ * Frame type 2:
+ * For #outsz != 0 and #framesz <= 512:
+ * sub sp,sp,#framesz ; establish the frame
+ * stp fp,lr,[sp,#outsz] ; save FP/LR.
+ * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
+ *
+ * The funclet frame is thus:
+ *
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * |Callee saved registers | // multiple of 8 bytes
+ * |-----------------------|
+ * | PSP slot | // 8 bytes
+ * |-----------------------|
+ * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
+ * |-----------------------|
+ * | Saved FP, LR | // 16 bytes
+ * |-----------------------|
+ * | Outgoing arg space | // multiple of 8 bytes
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ * Frame type 3:
+ * For #framesz > 512:
+ * stp fp,lr,[sp,- (#framesz - #outsz)]! ; establish the frame, save FP/LR: note that it is guaranteed here that (#framesz - #outsz) <= 168
+ * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
+ * sub sp,sp,#outsz ; create space for outgoing argument space
+ *
+ * The funclet frame is thus:
+ *
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * |Callee saved registers | // multiple of 8 bytes
+ * |-----------------------|
+ * | PSP slot | // 8 bytes
+ * |-----------------------|
+ * ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned
+ * |-----------------------|
+ * | Saved FP, LR | // 16 bytes
+ * |-----------------------|
+ * ~ alignment padding ~ // To make the whole frame 16 byte aligned (specifically, to 16-byte align the outgoing argument space).
+ * |-----------------------|
+ * | Outgoing arg space | // multiple of 8 bytes
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3,
+ * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack
+ * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 168 bytes:
+ * (1 PSP slot + 12 integer registers + 8 FP/SIMD registers) * 8 bytes. The outgoing argument size, however, can be very large, if we call a
+ * function that takes a large number of arguments (note that we currently use the same outgoing argument space size in the funclet as for the main
+ * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of outgoing arguments for any call).
+ * In that case, we need to 16-byte align the initial change to SP, before saving off the callee-saved registers and establishing the PSPsym,
+ * so we can use the limited immediate offset encodings we have available, before doing another 16-byte aligned SP adjustment to create the
+ * outgoing argument space. Both changes to SP might need to add alignment padding.
+ *
+ * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP
+ * as in the main function.
+ *
+ * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters.
+ * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog.
+ *
+ * if (this is a filter funclet)
+ * {
+ * // x1 on entry to a filter funclet is CallerSP of the containing function:
+ * // either the main function, or the funclet for a handler that this filter is dynamically nested within.
+ * // Note that a filter can be dynamically nested within a funclet even if it is not statically within
+ * // a funclet. Consider:
+ * //
+ * // try {
+ * // try {
+ * // throw new Exception();
+ * // } catch(Exception) {
+ * // throw new Exception(); // The exception thrown here ...
+ * // }
+ * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack
+ * // } filter-handler {
+ * // }
+ * //
+ * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will
+ * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always
+ * // create a main function PSP for any function with a filter.
+ *
+ * ldr x1, [x1, #CallerSP_to_PSP_slot_delta] ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
+ * str x1, [sp, #SP_to_PSP_slot_delta] ; store the PSP
+ * add fp, x1, #Function_CallerSP_to_FP_delta ; re-establish the frame pointer
+ * }
+ * else
+ * {
+ * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
+ * // TODO-ARM64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction.
+ *
+ * add x3, fp, #Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. x3 is scratch.
+ * str x3, [sp, #SP_to_PSP_slot_delta] ; store the PSP
+ * }
+ *
+ * An example epilog sequence is then:
+ *
+ * add sp,sp,#outsz ; if any outgoing argument space
+ * ... ; restore callee-saved registers
+ * ldp x19,x20,[sp,#xxx]
+ * ldp fp,lr,[sp],#framesz
+ * ret lr
+ *
+ * The funclet frame is thus:
+ *
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * |Callee saved registers | // multiple of 8 bytes
+ * |-----------------------|
+ * | PSP slot | // 8 bytes
+ * |-----------------------|
+ * | Saved FP, LR | // 16 bytes
+ * |-----------------------|
+ * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
+ * |-----------------------|
+ * | Outgoing arg space | // multiple of 8 bytes
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ */
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genFuncletProlog()\n");
+#endif
+
+ assert(block != NULL);
+ assert(block->bbFlags && BBF_FUNCLET_BEG);
+
+ ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+ gcInfo.gcResetForBB();
+
+ compiler->unwindBegProlog();
+
+ regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+ regMaskTP maskSaveRegsInt = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat;
+
+ // Funclets must always save LR and FP, since when we have funclets we must have an FP frame.
+ assert((maskSaveRegsInt & RBM_LR) != 0);
+ assert((maskSaveRegsInt & RBM_FP) != 0);
+
+ bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
+
+ regMaskTP maskArgRegsLiveIn;
+ if (isFilter)
+ {
+ maskArgRegsLiveIn = RBM_R0 | RBM_R1;
+ }
+ else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
+ {
+ maskArgRegsLiveIn = RBM_NONE;
+ }
+ else
+ {
+ maskArgRegsLiveIn = RBM_R0;
+ }
+
+ int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta;
+
+ if (genFuncletInfo.fiFrameType == 1)
+ {
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1, INS_OPTS_PRE_INDEX);
+ compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
+
+ assert(genFuncletInfo.fiSpDelta2 == 0);
+ assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0);
+ }
+ else if (genFuncletInfo.fiFrameType == 2)
+ {
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -genFuncletInfo.fiSpDelta1);
+ compiler->unwindAllocStack(-genFuncletInfo.fiSpDelta1);
+
+ assert(genFuncletInfo.fiSpDelta2 == 0);
+
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSP_to_FPLR_save_delta);
+ compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta);
+ }
+ else
+ {
+ assert(genFuncletInfo.fiFrameType == 3);
+
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1, INS_OPTS_PRE_INDEX);
+ compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
+
+ lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2; // We haven't done the second adjustment of SP yet.
+ }
+ maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now
+
+ genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, lowestCalleeSavedOffset, 0);
+
+ if (genFuncletInfo.fiFrameType == 3)
+ {
+ assert(genFuncletInfo.fiSpDelta2 != 0);
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -genFuncletInfo.fiSpDelta2);
+ compiler->unwindAllocStack(-genFuncletInfo.fiSpDelta2);
+ }
+
+ // This is the end of the OS-reported prolog for purposes of unwinding
+ compiler->unwindEndProlog();
+
+ if (isFilter)
+ {
+ // This is the first block of a filter
+
+ getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta);
+ regTracker.rsTrackRegTrash(REG_R1);
+ getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta);
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_R1, genFuncletInfo.fiFunction_CallerSP_to_FP_delta);
+ }
+ else
+ {
+ // This is a non-filter funclet
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, -genFuncletInfo.fiFunction_CallerSP_to_FP_delta);
+ regTracker.rsTrackRegTrash(REG_R3);
+ getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta);
+ }
+}
+
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet epilog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+#ifdef DEBUG
+ if (verbose)
+ printf("*************** In genFuncletEpilog()\n");
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+ bool unwindStarted = false;
+
+ if (!unwindStarted)
+ {
+ // We can delay this until we know we'll generate an unwindable instruction, if necessary.
+ compiler->unwindBegEpilog();
+ unwindStarted = true;
+ }
+
+ regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+ regMaskTP maskRestoreRegsInt = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat;
+
+ // Funclets must always save LR and FP, since when we have funclets we must have an FP frame.
+ assert((maskRestoreRegsInt & RBM_LR) != 0);
+ assert((maskRestoreRegsInt & RBM_FP) != 0);
+
+ maskRestoreRegsInt &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end
+
+ int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta;
+
+ if (genFuncletInfo.fiFrameType == 3)
+ {
+ assert(genFuncletInfo.fiSpDelta2 != 0);
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -genFuncletInfo.fiSpDelta2);
+ compiler->unwindAllocStack(-genFuncletInfo.fiSpDelta2);
+
+ lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2;
+ }
+
+ regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat;
+ genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, lowestCalleeSavedOffset, 0);
+
+ if (genFuncletInfo.fiFrameType == 1)
+ {
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1, INS_OPTS_POST_INDEX);
+ compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
+
+ assert(genFuncletInfo.fiSpDelta2 == 0);
+ assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0);
+ }
+ else if (genFuncletInfo.fiFrameType == 2)
+ {
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSP_to_FPLR_save_delta);
+ compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta);
+
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -genFuncletInfo.fiSpDelta1);
+ compiler->unwindAllocStack(-genFuncletInfo.fiSpDelta1);
+
+ assert(genFuncletInfo.fiSpDelta2 == 0);
+ }
+ else
+ {
+ assert(genFuncletInfo.fiFrameType == 3);
+
+ getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1, INS_OPTS_POST_INDEX);
+ compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
+ }
+
+ inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
+ compiler->unwindReturn(REG_LR);
+
+ compiler->unwindEndEpilog();
+}
+
+
+/*****************************************************************************
+ *
+ * Capture the information used to generate the funclet prologs and epilogs.
+ * Note that all funclet prologs are identical, and all funclet epilogs are
+ * identical (per type: filters are identical, and non-filters are identical).
+ * Thus, we compute the data used for these just once.
+ *
+ * See genFuncletProlog() for more information about the prolog/epilog sequences.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+ if (!compiler->ehAnyFunclets())
+ return;
+
+ assert(isFramePointerUsed());
+ assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
+
+ genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta();
+
+ regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
+ assert((rsMaskSaveRegs & RBM_LR) != 0);
+ assert((rsMaskSaveRegs & RBM_FP) != 0);
+
+ unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
+ unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + /* PSPSym */ REGSIZE_BYTES;
+ unsigned saveRegsPlusPSPSizeAligned = (unsigned)roundUp(saveRegsPlusPSPSize, STACK_ALIGN);
+
+ assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
+ unsigned outgoingArgSpaceAligned = (unsigned)roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);
+
+ unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned;
+ assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0);
+
+ int SP_to_FPLR_save_delta;
+ int SP_to_PSP_slot_delta;
+ int CallerSP_to_PSP_slot_delta;
+
+ if (maxFuncletFrameSizeAligned <= 512)
+ {
+ unsigned funcletFrameSize = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize;
+ unsigned funcletFrameSizeAligned = (unsigned)roundUp(funcletFrameSize, STACK_ALIGN);
+ assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned);
+
+ unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
+ assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES));
+
+ SP_to_FPLR_save_delta = compiler->lvaOutgoingArgSpaceSize;
+ SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + funcletFrameAlignmentPad;
+ CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES);
+
+ if (compiler->lvaOutgoingArgSpaceSize == 0)
+ {
+ genFuncletInfo.fiFrameType = 1;
+ }
+ else
+ {
+ genFuncletInfo.fiFrameType = 2;
+ }
+ genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned;
+ genFuncletInfo.fiSpDelta2 = 0;
+
+ assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)funcletFrameSizeAligned);
+ }
+ else
+ {
+ unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize;
+ assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES));
+
+ SP_to_FPLR_save_delta = outgoingArgSpaceAligned;
+ SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsPlusPSPAlignmentPad;
+ CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES - saveRegsPlusPSPAlignmentPad);
+
+ genFuncletInfo.fiFrameType = 3;
+ genFuncletInfo.fiSpDelta1 = -(int)saveRegsPlusPSPSizeAligned;
+ genFuncletInfo.fiSpDelta2 = -(int)outgoingArgSpaceAligned;
+
+ assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)maxFuncletFrameSizeAligned);
+ }
+
+ /* Now save it for future use */
+
+ genFuncletInfo.fiSaveRegs = rsMaskSaveRegs;
+ genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta;
+ genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta;
+ genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_PSP_slot_delta + REGSIZE_BYTES;
+ genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n");
+ printf("Funclet prolog / epilog info\n");
+ printf(" Save regs: "); dspRegMask(genFuncletInfo.fiSaveRegs); printf("\n");
+ printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta);
+ printf(" SP to FP/LR save location delta: %d\n", genFuncletInfo.fiSP_to_FPLR_save_delta);
+ printf(" SP to PSP slot delta: %d\n", genFuncletInfo.fiSP_to_PSP_slot_delta);
+ printf(" SP to callee-saved area delta: %d\n", genFuncletInfo.fiSP_to_CalleeSave_delta);
+ printf(" Caller SP to PSP slot delta: %d\n", genFuncletInfo.fiCallerSP_to_PSP_slot_delta);
+ printf(" Frame type: %d\n", genFuncletInfo.fiFrameType);
+ printf(" SP delta 1: %d\n", genFuncletInfo.fiSpDelta1);
+ printf(" SP delta 2: %d\n", genFuncletInfo.fiSpDelta2);
+
+ if (CallerSP_to_PSP_slot_delta != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
+ {
+ printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n", compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
+ }
+ }
+#endif // DEBUG
+
+ assert(genFuncletInfo.fiSP_to_FPLR_save_delta >= 0);
+ assert(genFuncletInfo.fiSP_to_PSP_slot_delta >= 0);
+ assert(genFuncletInfo.fiSP_to_CalleeSave_delta >= 0);
+ assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta <= 0);
+ assert(compiler->lvaPSPSym != BAD_VAR_NUM);
+ assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta == compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and funclet!
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX End Prolog / Epilog XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+// Get the register assigned to the given node
+
+regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
+{
+ return tree->gtRegNum;
+}
+
+//------------------------------------------------------------------------
+// genSpillVar: Spill a local variable
+//
+// Arguments:
+// tree - the lclVar node for the variable being spilled
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The lclVar must be a register candidate (lvRegCandidate)
+
+void CodeGen::genSpillVar(GenTreePtr tree)
+{
+ unsigned varNum = tree->gtLclVarCommon.gtLclNum;
+ LclVarDsc * varDsc = &(compiler->lvaTable[varNum]);
+
+ assert(varDsc->lvIsRegCandidate());
+
+ // We don't actually need to spill if it is already living in memory
+ bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
+ if (needsSpill)
+ {
+ var_types lclTyp = varDsc->TypeGet();
+ if (varDsc->lvNormalizeOnStore())
+ lclTyp = genActualType(lclTyp);
+ emitAttr size = emitTypeSize(lclTyp);
+
+ bool restoreRegVar = false;
+ if (tree->gtOper == GT_REG_VAR)
+ {
+ tree->SetOper(GT_LCL_VAR);
+ restoreRegVar = true;
+ }
+
+ // mask off the flag to generate the right spill code, then bring it back
+ tree->gtFlags &= ~GTF_REG_VAL;
+
+ instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum));
+
+ if (varTypeIsMultiReg(tree))
+ {
+ assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair));
+ assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair));
+ regNumber regLo = genRegPairLo(tree->gtRegPair);
+ regNumber regHi = genRegPairHi(tree->gtRegPair);
+ inst_TT_RV(storeIns, tree, regLo);
+ inst_TT_RV(storeIns, tree, regHi, 4);
+ }
+ else
+ {
+ assert(varDsc->lvRegNum == tree->gtRegNum);
+ inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size);
+ }
+ tree->gtFlags |= GTF_REG_VAL;
+
+ if (restoreRegVar)
+ {
+ tree->SetOper(GT_REG_VAR);
+ }
+
+ genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
+ gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
+
+ if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
+ {
+#ifdef DEBUG
+ if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
+ }
+ else
+ {
+ JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
+ }
+#endif
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+ }
+
+ }
+
+ tree->gtFlags &= ~GTF_SPILL;
+ varDsc->lvRegNum = REG_STK;
+ if (varTypeIsMultiReg(tree))
+ {
+ varDsc->lvOtherReg = REG_STK;
+ }
+}
+
+// inline
+void CodeGenInterface::genUpdateVarReg(LclVarDsc * varDsc, GenTreePtr tree)
+{
+ assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
+ varDsc->lvRegNum = tree->gtRegNum;
+}
+
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ * Generate code that will set the given register to the integer constant.
+ */
+
+void CodeGen::genSetRegToIcon(regNumber reg,
+ ssize_t val,
+ var_types type,
+ insFlags flags)
+{
+ // Reg cannot be a FP reg
+ assert(!genIsValidFloatReg(reg));
+
+ // The only TYP_REF constant that can come this path is a managed 'null' since it is not
+ // relocatable. Other ref type constants (e.g. string objects) go through a different
+ // code path.
+ noway_assert(type != TYP_REF || val == 0);
+
+ instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
+}
+
+
+/*****************************************************************************
+ *
+ * Generate code to check that the GS cookie wasn't thrashed by a buffer
+ * overrun. On ARM64 we always use REG_TMP_0 and REG_TMP_1 as temp registers
+ * and this works fine in the case of tail calls
+ * Implementation Note: pushReg = true, in case of tail calls.
+ */
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+ noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+ // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
+ // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
+ if (!pushReg && (compiler->info.compRetType == TYP_REF))
+ gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
+
+ regNumber regGSConst = REG_TMP_0;
+ regNumber regGSValue = REG_TMP_1;
+
+ if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+ {
+ // load the GS cookie constant into a reg
+ //
+ genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
+ }
+ else
+ {
+ // Ngen case - GS cookie constant needs to be accessed through an indirection.
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+ getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0);
+ }
+ // Load this method's GS value from the stack frame
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
+ // Compare with the GC cookie constant
+ getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue);
+
+ BasicBlock *gsCheckBlk = genCreateTempLabel();
+ inst_JMP(genJumpKindForOper(GT_EQ, true), gsCheckBlk);
+ genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
+ genDefineTempLabel(gsCheckBlk);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for all the basic blocks in the function.
+ */
+
+void CodeGen::genCodeForBBlist()
+{
+ unsigned varNum;
+ LclVarDsc * varDsc;
+
+ unsigned savedStkLvl;
+
+#ifdef DEBUG
+ genInterruptibleUsed = true;
+ unsigned stmtNum = 0;
+ UINT64 totalCostEx = 0;
+ UINT64 totalCostSz = 0;
+
+ // You have to be careful if you create basic blocks from now on
+ compiler->fgSafeBasicBlockCreation = false;
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnCall)
+ {
+ compiler->opts.compStackCheckOnCall = false;
+ }
+
+ // This stress mode is not comptible with fully interruptible GC
+ if (genInterruptible && compiler->opts.compStackCheckOnRet)
+ {
+ compiler->opts.compStackCheckOnRet = false;
+ }
+#endif // DEBUG
+
+ // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
+ genPrepForEHCodegen();
+
+ assert(!compiler->fgFirstBBScratch || compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
+
+ /* Initialize the spill tracking logic */
+
+ regSet.rsSpillBeg();
+
+ /* Initialize the line# tracking logic */
+
+#ifdef DEBUGGING_SUPPORT
+ if (compiler->opts.compScopeInfo)
+ {
+ siInit();
+ }
+#endif
+
+ // The current implementation of switch tables requires the first block to have a label so it
+ // can generate offsets to the switch label targets.
+ // TODO-ARM64-CQ: remove this when switches have been re-implemented to not use this.
+ if (compiler->fgHasSwitch)
+ {
+ compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+ }
+
+ genPendingCallLabel = nullptr;
+
+ /* Initialize the pointer tracking code */
+
+ gcInfo.gcRegPtrSetInit();
+ gcInfo.gcVarPtrSetInit();
+
+ /* If any arguments live in registers, mark those regs as such */
+
+ for (varNum = 0, varDsc = compiler->lvaTable;
+ varNum < compiler->lvaCount;
+ varNum++ , varDsc++)
+ {
+ /* Is this variable a parameter assigned to a register? */
+
+ if (!varDsc->lvIsParam || !varDsc->lvRegister)
+ continue;
+
+ /* Is the argument live on entry to the method? */
+
+ if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+ continue;
+
+ /* Is this a floating-point argument? */
+
+ if (varDsc->IsFloatRegType())
+ continue;
+
+ noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
+
+ /* Mark the register as holding the variable */
+
+ regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
+ }
+
+ unsigned finallyNesting = 0;
+
+ // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
+ // allocation at the start of each basic block.
+ VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
+
+ /*-------------------------------------------------------------------------
+ *
+ * Walk the basic blocks and generate code for each one
+ *
+ */
+
+ BasicBlock * block;
+ BasicBlock * lblk; /* previous block */
+
+ for (lblk = NULL, block = compiler->fgFirstBB;
+ block != NULL;
+ lblk = block, block = block->bbNext)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n=============== Generating ");
+ block->dspBlockHeader(compiler, true, true);
+ compiler->fgDispBBLiveness(block);
+ }
+#endif // DEBUG
+
+ /* Figure out which registers hold variables on entry to this block */
+
+ regSet.rsMaskVars = RBM_NONE;
+ gcInfo.gcRegGCrefSetCur = RBM_NONE;
+ gcInfo.gcRegByrefSetCur = RBM_NONE;
+
+ compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
+
+ genUpdateLife(block->bbLiveIn);
+
+ // Even if liveness didn't change, we need to update the registers containing GC references.
+ // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't change?
+ // We cleared them out above. Maybe we should just not clear them out, but update the ones that change here.
+ // That would require handling the changes in recordVarLocationsAtStartOfBB().
+
+ regMaskTP newLiveRegSet = RBM_NONE;
+ regMaskTP newRegGCrefSet = RBM_NONE;
+ regMaskTP newRegByrefSet = RBM_NONE;
+#ifdef DEBUG
+ VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler));
+ VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler));
+#endif
+ VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
+ while (iter.NextElem(compiler, &varIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+ LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+ if (varDsc->lvIsInReg())
+ {
+ newLiveRegSet |= varDsc->lvRegMask();
+ if (varDsc->lvType == TYP_REF)
+ {
+ newRegGCrefSet |= varDsc->lvRegMask();
+ }
+ else if (varDsc->lvType == TYP_BYREF)
+ {
+ newRegByrefSet |= varDsc->lvRegMask();
+ }
+#ifdef DEBUG
+ if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
+ {
+ VarSetOps::AddElemD(compiler, removedGCVars, varIndex);
+ }
+#endif DEBUG
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+ }
+ else if (compiler->lvaIsGCTracked(varDsc))
+ {
+#ifdef DEBUG
+ if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
+ {
+ VarSetOps::AddElemD(compiler, addedGCVars, varIndex);
+ }
+#endif DEBUG
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+ }
+ }
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tLive regs: ");
+ if (regSet.rsMaskVars == newLiveRegSet)
+ {
+ printf("(unchanged) ");
+ }
+ else
+ {
+ printRegMaskInt(regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
+ printf(" => ");
+ }
+ printRegMaskInt(newLiveRegSet);
+ compiler->getEmitter()->emitDispRegSet(newLiveRegSet);
+ printf("\n");
+ if (!VarSetOps::IsEmpty(compiler, addedGCVars))
+ {
+ printf("\t\t\t\t\t\t\tAdded GCVars: ");
+ dumpConvertedVarSet(compiler, addedGCVars);
+ printf("\n");
+ }
+ if (!VarSetOps::IsEmpty(compiler, removedGCVars))
+ {
+ printf("\t\t\t\t\t\t\tRemoved GCVars: ");
+ dumpConvertedVarSet(compiler, removedGCVars);
+ printf("\n");
+ }
+ }
+#endif // DEBUG
+
+ regSet.rsMaskVars = newLiveRegSet;
+ gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUG_ARG(true));
+ gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUG_ARG(true));
+
+ /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
+ represent the exception object (TYP_REF).
+ We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
+ to the block, it will be the first thing evaluated
+ (thanks to GTF_ORDER_SIDEEFF).
+ */
+
+ if (handlerGetsXcptnObj(block->bbCatchTyp))
+ {
+#if JIT_FEATURE_SSA_SKIP_DEFS
+ GenTreePtr firstStmt = block->FirstNonPhiDef();
+#else
+ GenTreePtr firstStmt = block->bbTreeList;
+#endif
+ if (firstStmt != NULL)
+ {
+ GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr;
+ if (compiler->gtHasCatchArg(firstTree))
+ {
+ gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
+ }
+ }
+ }
+
+ /* Start a new code output block */
+
+ genUpdateCurrentFunclet(block);
+
+#ifdef _TARGET_XARCH_
+ if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
+ {
+ getEmitter()->emitLoopAlign();
+ }
+#endif
+
+#ifdef DEBUG
+ if (compiler->opts.dspCode)
+ printf("\n L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
+#endif
+
+ block->bbEmitCookie = NULL;
+
+ if (block->bbFlags & (BBF_JMP_TARGET|BBF_HAS_LABEL))
+ {
+ /* Mark a label and update the current set of live GC refs */
+
+ block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
+ FALSE);
+ }
+
+ if (block == compiler->fgFirstColdBlock)
+ {
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\nThis is the start of the cold region of the method\n");
+ }
+#endif
+ // We should never have a block that falls through into the Cold section
+ noway_assert(!lblk->bbFallsThrough());
+
+ // We require the block that starts the Cold section to have a label
+ noway_assert(block->bbEmitCookie);
+ getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
+ }
+
+ /* Both stacks are always empty on entry to a basic block */
+
+ genStackLevel = 0;
+
+ savedStkLvl = genStackLevel;
+
+ /* Tell everyone which basic block we're working on */
+
+ compiler->compCurBB = block;
+
+#ifdef DEBUGGING_SUPPORT
+ siBeginBlock(block);
+
+ // BBF_INTERNAL blocks don't correspond to any single IL instruction.
+ if (compiler->opts.compDbgInfo &&
+ (block->bbFlags & BBF_INTERNAL) &&
+ !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to emit a NO_MAPPING entry, immediately after the prolog.
+ {
+ genIPmappingAdd((IL_OFFSETX) ICorDebugInfo::NO_MAPPING, true);
+ }
+
+ bool firstMapping = true;
+#endif // DEBUGGING_SUPPORT
+
+ /*---------------------------------------------------------------------
+ *
+ * Generate code for each statement-tree in the block
+ *
+ */
+
+ if (block->bbFlags & BBF_FUNCLET_BEG)
+ {
+ genReserveFuncletProlog(block);
+ }
+
+ for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+ {
+ noway_assert(stmt->gtOper == GT_STMT);
+
+ if (stmt->AsStmt()->gtStmtIsEmbedded())
+ continue;
+
+ /* Get hold of the statement tree */
+ GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+#if defined(DEBUGGING_SUPPORT)
+
+ /* Do we have a new IL-offset ? */
+
+ if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
+ {
+ /* Create and append a new IP-mapping entry */
+ genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping);
+ firstMapping = false;
+ }
+
+#endif // DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+ noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
+ stmt->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
+
+ if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
+ stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
+ {
+ while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs)
+ {
+ genCurDispOffset +=
+ dumpSingleInstr(compiler->info.compCode, genCurDispOffset, "> ");
+ }
+ }
+
+ stmtNum++;
+ if (compiler->verbose)
+ {
+ printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum);
+ printf("Holding variables: ");
+ dspRegMask(regSet.rsMaskVars); printf("\n\n");
+ if (compiler->verboseTrees)
+ {
+ compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree);
+ printf("\n");
+ }
+ }
+ totalCostEx += ((UINT64)stmt->gtCostEx * block->getBBWeight(compiler));
+ totalCostSz += (UINT64) stmt->gtCostSz;
+#endif // DEBUG
+
+ // Traverse the tree in linear order, generating code for each node in the
+ // tree as we encounter it
+
+ compiler->compCurLifeTree = NULL;
+ compiler->compCurStmt = stmt;
+ for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList;
+ treeNode != NULL;
+ treeNode = treeNode->gtNext)
+ {
+ genCodeForTreeNode(treeNode);
+ if (treeNode->gtHasReg() && treeNode->gtLsraInfo.isLocalDefUse)
+ {
+ genConsumeReg(treeNode);
+ }
+ }
+
+ regSet.rsSpillChk();
+
+#ifdef DEBUG
+ /* Make sure we didn't bungle pointer register tracking */
+
+ regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur);
+ regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
+
+ // If return is a GC-type, clear it. Note that if a common
+ // epilog is generated (genReturnBB) it has a void return
+ // even though we might return a ref. We can't use the compRetType
+ // as the determiner because something we are tracking as a byref
+ // might be used as a return value of a int function (which is legal)
+ if (tree->gtOper == GT_RETURN &&
+ (varTypeIsGC(compiler->info.compRetType) ||
+ (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet()))))
+ {
+ nonVarPtrRegs &= ~RBM_INTRET;
+ }
+
+ // When profiling, the first statement in a catch block will be the
+ // harmless "inc" instruction (does not interfere with the exception
+ // object).
+
+ if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) &&
+ (stmt == block->bbTreeList) &&
+ handlerGetsXcptnObj(block->bbCatchTyp))
+ {
+ nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
+ }
+
+ if (nonVarPtrRegs)
+ {
+ printf("Regset after tree=");
+ compiler->printTreeID(tree);
+ printf(" BB%02u gcr=", block->bbNum);
+ printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+ printf(", byr=");
+ printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+ printf(", regVars=");
+ printRegMaskInt(regSet.rsMaskVars);
+ compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
+ printf("\n");
+ }
+
+ noway_assert(nonVarPtrRegs == 0);
+
+ for (GenTree * node = stmt->gtStmt.gtStmtList; node; node=node->gtNext)
+ {
+ assert(!(node->gtFlags & GTF_SPILL));
+ }
+
+#endif // DEBUG
+
+ noway_assert(stmt->gtOper == GT_STMT);
+
+#ifdef DEBUGGING_SUPPORT
+ genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx);
+#endif
+
+ } //-------- END-FOR each statement-tree of the current block ---------
+
+#if defined(DEBUG) && defined(_TARGET_ARM64_)
+ if (block->bbNext == nullptr)
+ {
+ // Unit testing of the ARM64 emitter: generate a bunch of instructions into the last block
+ // (it's as good as any, but better than the prolog, which can only be a single instruction
+ // group) then use COMPLUS_JitLateDisasm=* to see if the late disassembler
+ // thinks the instructions are the same as we do.
+ genArm64EmitterUnitTests();
+ }
+#endif // defined(DEBUG) && defined(_TARGET_ARM64_)
+
+#ifdef DEBUGGING_SUPPORT
+
+ if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+ {
+ siEndBlock(block);
+
+ /* Is this the last block, and are there any open scopes left ? */
+
+ bool isLastBlockProcessed = (block->bbNext == NULL);
+ if (block->isBBCallAlwaysPair())
+ {
+ isLastBlockProcessed = (block->bbNext->bbNext == NULL);
+ }
+
+ if (isLastBlockProcessed && siOpenScopeList.scNext)
+ {
+ /* This assert no longer holds, because we may insert a throw
+ block to demarcate the end of a try or finally region when they
+ are at the end of the method. It would be nice if we could fix
+ our code so that this throw block will no longer be necessary. */
+
+ //noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
+
+ siCloseAllOpenScopes();
+ }
+ }
+
+#endif // DEBUGGING_SUPPORT
+
+ genStackLevel -= savedStkLvl;
+
+#ifdef DEBUG
+ // compCurLife should be equal to the liveOut set, except that we don't keep
+ // it up to date for vars that are not register candidates
+ // (it would be nice to have a xor set function)
+
+ VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
+ VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
+ VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
+ while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
+ {
+ unsigned varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
+ LclVarDsc * varDsc = compiler->lvaTable + varNum;
+ assert(!varDsc->lvIsRegCandidate());
+ }
+#endif
+
+ /* Both stacks should always be empty on exit from a basic block */
+
+ noway_assert(genStackLevel == 0);
+
+#if 0
+ // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
+ // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
+ // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
+ // The document "X64 and ARM ABIs.docx" has more details. The situations:
+ // 1. If the call instruction is in a different EH region as the instruction that follows it.
+ // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
+ // be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters here.)
+ // We handle case #1 here, and case #2 in the emitter.
+ if (getEmitter()->emitIsLastInsCall())
+ {
+ // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
+ // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
+ // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
+ // generated before the OS epilog starts, such as a GS cookie check.
+ if ((block->bbNext == nullptr) ||
+ !BasicBlock::sameEHRegion(block, block->bbNext))
+ {
+ // We only need the NOP if we're not going to generate any more code as part of the block end.
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ case BBJ_THROW:
+ case BBJ_CALLFINALLY:
+ case BBJ_EHCATCHRET:
+ // We're going to generate more code below anyway, so no need for the NOP.
+
+ case BBJ_RETURN:
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ // These are the "epilog follows" case, handled in the emitter.
+
+ break;
+
+ case BBJ_NONE:
+ if (block->bbNext == nullptr)
+ {
+ // Call immediately before the end of the code; we should never get here .
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+ else
+ {
+ // We need the NOP
+ instGen(INS_nop);
+ }
+ break;
+
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ // These can't have a call as the last instruction!
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+ }
+ }
+#endif // 0
+
+ /* Do we need to generate a jump or return? */
+
+ switch (block->bbJumpKind)
+ {
+ case BBJ_ALWAYS:
+ inst_JMP(EJ_jmp, block->bbJumpDest);
+ break;
+
+ case BBJ_RETURN:
+ genExitCode(block);
+ break;
+
+ case BBJ_THROW:
+ // If we have a throw at the end of a function or funclet, we need to emit another instruction
+ // afterwards to help the OS unwinder determine the correct context during unwind.
+ // We insert an unexecuted breakpoint instruction in several situations
+ // following a throw instruction:
+ // 1. If the throw is the last instruction of the function or funclet. This helps
+ // the OS unwinder determine the correct context during an unwind from the
+ // thrown exception.
+ // 2. If this is this is the last block of the hot section.
+ // 3. If the subsequent block is a special throw block.
+ // 4. On AMD64, if the next block is in a different EH region.
+ if ((block->bbNext == NULL)
+ || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
+ || !BasicBlock::sameEHRegion(block, block->bbNext)
+ || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext))
+ || block->bbNext == compiler->fgFirstColdBlock
+ )
+ {
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+
+ break;
+
+ case BBJ_CALLFINALLY:
+
+ // Generate a call to the finally, like this:
+ // mov x0,qword ptr [fp + 10H] // Load x0 with PSPSym
+ // bl finally-funclet
+ // b finally-return // Only for non-retless finally calls
+ // The 'b' can be a NOP if we're going to the next block.
+
+ getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0);
+ getEmitter()->emitIns_J(INS_bl_local, block->bbJumpDest);
+
+ if (block->bbFlags & BBF_RETLESS_CALL)
+ {
+ // We have a retless call, and the last instruction generated was a call.
+ // If the next block is in a different EH region (or is the end of the code
+ // block), then we need to generate a breakpoint here (since it will never
+ // get executed) to get proper unwind behavior.
+
+ if ((block->bbNext == nullptr) ||
+ !BasicBlock::sameEHRegion(block, block->bbNext))
+ {
+ instGen(INS_BREAKPOINT); // This should never get executed
+ }
+ }
+ else
+ {
+ // Because of the way the flowgraph is connected, the liveness info for this one instruction
+ // after the call is not (can not be) correct in cases where a variable has a last use in the
+ // handler. So turn off GC reporting for this single instruction.
+ getEmitter()->emitMakeRemainderNonInterruptible();
+
+ // Now go to where the finally funclet needs to return to.
+ if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
+ {
+ // Fall-through.
+ // TODO-ARM64-CQ: Can we get rid of this instruction, and just have the call return directly
+ // to the next instruction? This would depend on stack walking from within the finally
+ // handler working without this instruction being in this special EH region.
+ instGen(INS_nop);
+ }
+ else
+ {
+ inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
+ }
+ }
+
+ // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+ // jump target using bbJumpDest - that is already used to point
+ // to the finally block. So just skip past the BBJ_ALWAYS unless the
+ // block is RETLESS.
+ if ( !(block->bbFlags & BBF_RETLESS_CALL) )
+ {
+ assert(block->isBBCallAlwaysPair());
+
+ lblk = block;
+ block = block->bbNext;
+ }
+ break;
+
+ case BBJ_EHCATCHRET:
+ getEmitter()->emitIns_R_L(INS_adr, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_INTRET);
+
+ __fallthrough;
+
+ case BBJ_EHFINALLYRET:
+ case BBJ_EHFILTERRET:
+ genReserveFuncletEpilog(block);
+ break;
+
+ case BBJ_NONE:
+ case BBJ_COND:
+ case BBJ_SWITCH:
+ break;
+
+ default:
+ noway_assert(!"Unexpected bbJumpKind");
+ break;
+ }
+
+#ifdef DEBUG
+ compiler->compCurBB = 0;
+#endif
+
+ } //------------------ END-FOR each block of the method -------------------
+
+ /* Nothing is live at this point */
+ genUpdateLife(VarSetOps::MakeEmpty(compiler));
+
+ /* Finalize the spill tracking logic */
+
+ regSet.rsSpillEnd();
+
+ /* Finalize the temp tracking logic */
+
+ compiler->tmpEnd();
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\n# ");
+ printf("totalCostEx = %6d, totalCostSz = %5d ",
+ totalCostEx, totalCostSz);
+ printf("%s\n", compiler->info.compFullName);
+ }
+#endif
+}
+
+// return the child that has the same reg as the dst (if any)
+// other child returned (out param) in 'other'
+// TODO-Cleanup: move to CodeGenCommon.cpp
+GenTree *
+sameRegAsDst(GenTree *tree, GenTree *&other /*out*/)
+{
+ if (tree->gtRegNum == REG_NA)
+ {
+ other = nullptr;
+ return NULL;
+ }
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ if (op1->gtRegNum == tree->gtRegNum)
+ {
+ other = op2;
+ return op1;
+ }
+ if (op2->gtRegNum == tree->gtRegNum)
+ {
+ other = op1;
+ return op2;
+ }
+ else
+ {
+ other = nullptr;
+ return NULL;
+ }
+}
+
+// move an immediate value into an integer register
+
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size,
+ regNumber reg,
+ ssize_t imm,
+ insFlags flags)
+{
+ // reg cannot be a FP register
+ assert(!genIsValidFloatReg(reg));
+
+ if (!compiler->opts.compReloc)
+ {
+ size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
+ }
+
+ if (EA_IS_RELOC(size))
+ {
+ NYI("Reloc constant");
+ }
+ else if (imm == 0)
+ {
+ instGen_Set_Reg_To_Zero(size, reg, flags);
+ }
+ else
+ {
+ if (emitter::emitIns_valid_imm_for_mov(imm, size))
+ {
+ getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_mov, size, reg, (imm & 0xffff));
+ getEmitter()->emitIns_R_I_I(INS_movk, size, reg, ((imm >> 16) & 0xffff), 16, INS_OPTS_LSL);
+
+ if ((size == EA_8BYTE) && ((imm >> 32) != 0)) // Sometimes the upper 32 bits are zero and the first mov has zero-ed them
+ {
+ getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 32) & 0xffff), 32, INS_OPTS_LSL);
+ if ((imm >> 48) != 0) // Frequently the upper 16 bits are zero and the first mov has zero-ed them
+ {
+ getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 48) & 0xffff), 48, INS_OPTS_LSL);
+ }
+ }
+ }
+ // The caller may have requested that the flags be set on this mov (rarely/never)
+ if (flags == INS_FLAGS_SET)
+ {
+ getEmitter()->emitIns_R_I(INS_tst, size, reg, 0);
+ }
+ }
+
+ regTracker.rsTrackRegIntCns(reg, imm);
+}
+
+/***********************************************************************************
+ *
+ * Generate code to set a register 'targetReg' of type 'targetType' to the constant
+ * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
+ * genProduceReg() on the target register.
+ */
+void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
+{
+ switch (tree->gtOper)
+ {
+ case GT_CNS_INT:
+ {
+ // relocatable values tend to come down as a CNS_INT of native int type
+ // so the line between these two opcodes is kind of blurry
+ GenTreeIntConCommon* con = tree->AsIntConCommon();
+ ssize_t cnsVal = con->IconValue();
+
+ bool needReloc = compiler->opts.compReloc && tree->IsIconHandle();
+ if (needReloc)
+ {
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
+ regTracker.rsTrackRegTrash(targetReg);
+ }
+ else
+ {
+ genSetRegToIcon(targetReg, cnsVal, targetType);
+ }
+ }
+ break;
+
+ case GT_CNS_DBL:
+ {
+ emitter *emit = getEmitter();
+ emitAttr size = emitTypeSize(tree);
+ GenTreeDblCon *dblConst = tree->AsDblCon();
+ double constValue = dblConst->gtDblCon.gtDconVal;
+
+ // Make sure we use "movi reg, 0x00" only for positive zero (0.0) and not for negative zero (-0.0)
+ if (*(__int64*)&constValue == 0)
+ {
+ // A faster/smaller way to generate 0.0
+ // We will just zero out the entire vector register for both float and double
+ emit->emitIns_R_I(INS_movi, EA_16BYTE, targetReg, 0x00, INS_OPTS_16B);
+ }
+ else if (emitter::emitIns_valid_imm_for_fmov(constValue))
+ {
+ // We can load the FP constant using the fmov FP-immediate for this constValue
+ emit->emitIns_R_F(INS_fmov, size, targetReg, constValue);
+ }
+ else
+ {
+ // We must load the FP constant from the constant pool
+ // Emit a data section constant for the float or double constant.
+ CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(dblConst);
+ emit->emitIns_R_C(INS_ldr, size, targetReg, hnd, 0);
+ }
+ }
+ break;
+
+ default:
+ unreached();
+ }
+}
+
+
+// Generate code to get the high N bits of a N*N=2N bit multiplication result
+void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
+{
+ assert(!(treeNode->gtFlags & GTF_UNSIGNED));
+ assert(!treeNode->gtOverflowEx());
+
+#if 0
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter *emit = getEmitter();
+ emitAttr size = emitTypeSize(treeNode);
+ GenTree *op1 = treeNode->gtOp.gtOp1;
+ GenTree *op2 = treeNode->gtOp.gtOp2;
+
+ // to get the high bits of the multiply, we are constrained to using the
+ // 1-op form: RDX:RAX = RAX * rm
+ // The 3-op form (Rx=Ry*Rz) does not support it.
+
+ genConsumeOperands(treeNode->AsOp());
+
+ GenTree* regOp = op1;
+ GenTree* rmOp = op2;
+
+ // Set rmOp to the contained memory operand (if any)
+ //
+ if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg)))
+ {
+ regOp = op2;
+ rmOp = op1;
+ }
+ assert(!regOp->isContained());
+
+ // Setup targetReg when neither of the source operands was a matching register
+ if (regOp->gtRegNum != targetReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType);
+ }
+
+ emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp);
+
+ // Move the result to the desired register, if necessary
+ if (targetReg != REG_RDX)
+ {
+ inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
+ }
+#else // !0
+ NYI("genCodeForMulHi");
+#endif // !0
+}
+
+// generate code for a DIV or MOD operation
+//
+void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
+{
+ // unused on ARM64
+}
+
+// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR
+void CodeGen::genCodeForBinary(GenTree* treeNode)
+{
+ const genTreeOps oper = treeNode->OperGet();
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter *emit = getEmitter();
+
+ assert (oper == GT_ADD ||
+ oper == GT_SUB ||
+ oper == GT_MUL ||
+ oper == GT_DIV ||
+ oper == GT_UDIV ||
+ oper == GT_AND ||
+ oper == GT_OR ||
+ oper == GT_XOR);
+
+ GenTreePtr op1 = treeNode->gtGetOp1();
+ GenTreePtr op2 = treeNode->gtGetOp2();
+ instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+
+ // The arithmetic node must be sitting in a register (since it's not contained)
+ noway_assert(targetReg != REG_NA);
+
+ genConsumeOperands(treeNode->AsOp());
+
+ regNumber r = emit->emitInsTernary(ins, emitTypeSize(treeNode), treeNode, op1, op2);
+ noway_assert(r == targetReg);
+
+ genProduceReg(treeNode);
+}
+
+
+/*****************************************************************************
+ *
+ * Generate code for a single node in the tree.
+ * Preconditions: All operands have been evaluated
+ *
+ */
+void
+CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
+{
+ regNumber targetReg = treeNode->gtRegNum;
+ var_types targetType = treeNode->TypeGet();
+ emitter *emit = getEmitter();
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
+ printf("Generating: ");
+ compiler->gtDispTree(treeNode, nullptr, nullptr, true);
+ }
+#endif // DEBUG
+
+ // Is this a node whose value is already in a register? LSRA denotes this by
+ // setting the GTF_REUSE_REG_VAL flag.
+ if (treeNode->IsReuseRegVal())
+ {
+ // For now, this is only used for constant nodes.
+ assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL));
+ JITDUMP(" TreeNode is marked ReuseReg\n");
+ return;
+ }
+
+ // contained nodes are part of their parents for codegen purposes
+ // ex : immediates, most LEAs
+ if (treeNode->isContained())
+ {
+ return;
+ }
+
+ switch (treeNode->gtOper)
+ {
+ case GT_START_NONGC:
+ getEmitter()->emitMakeRemainderNonInterruptible();
+ break;
+
+ case GT_PROF_HOOK:
+ // We should be seeing this only if profiler hook is needed
+ noway_assert(compiler->compIsProfilerHookNeeded());
+
+#ifdef PROFILING_SUPPORTED
+ // Right now this node is used only for tail calls. In future if
+ // we intend to use it for Enter or Leave hooks, add a data member
+ // to this node indicating the kind of profiler hook. For example,
+ // helper number can be used.
+ genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif // PROFILING_SUPPORTED
+ break;
+
+ case GT_LCLHEAP:
+ genLclHeap(treeNode);
+ break;
+
+ case GT_CNS_INT:
+ case GT_CNS_DBL:
+ genSetRegToConst(targetReg, targetType, treeNode);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_NOT:
+ assert(!varTypeIsFloating(targetType));
+
+ __fallthrough;
+
+ case GT_NEG:
+ {
+ instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+
+ // The arithmetic node must be sitting in a register (since it's not contained)
+ assert(!treeNode->isContained());
+ // The dst can only be a register.
+ assert(targetReg != REG_NA);
+
+ GenTreePtr operand = treeNode->gtGetOp1();
+ assert(!operand->isContained());
+ // The src must be a register.
+ regNumber operandReg = genConsumeReg(operand);
+
+ getEmitter()->emitIns_R_R(ins, emitTypeSize(treeNode), targetReg, operandReg);
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_DIV:
+ case GT_UDIV:
+ if (varTypeIsFloating(targetType))
+ {
+ // Floating point divide never raises an exception
+ genCodeForBinary(treeNode);
+ }
+ else // integer divide operation
+ {
+ GenTreePtr divisorOp = treeNode->gtGetOp2();
+
+ // TODO-ARM64-CQ: Optimize a divide by power of 2 as we do for AMD64
+
+ if (divisorOp->IsZero())
+ {
+ genJumpToThrowHlpBlk(EJ_je, Compiler::ACK_DIV_BY_ZERO);
+ // We don't need to generate the sdiv/udiv instruction
+ }
+ else
+ {
+ emitAttr cmpSize = EA_ATTR(genTypeSize(genActualType(treeNode->TypeGet())));
+ regNumber divisorReg = divisorOp->gtRegNum;
+
+ if (treeNode->gtOper == GT_DIV)
+ {
+ BasicBlock* sdivLabel = genCreateTempLabel();
+
+ // Two possible exceptions:
+ // (AnyVal / 0) => DivideByZeroException
+ // (MinInt / -1) => ArithmeticException
+ //
+ bool checkDividend = true;
+ // Do we have a contained immediate for the 'divisorOp'?
+ if (divisorOp->isContainedIntOrIImmed())
+ {
+ GenTreeIntConCommon* intConst = divisorOp->AsIntConCommon();
+ assert(intConst->IconValue() != 0); // already checked above by IsZero()
+ if (intConst->IconValue() != -1)
+ {
+ checkDividend = false; // We statically know that the dividend is not -1
+ }
+ }
+ else
+ {
+ // Check if the divisor is zero throw a DivideByZeroException
+ emit->emitIns_R_I(INS_cmp, cmpSize, divisorReg, 0);
+ genJumpToThrowHlpBlk(EJ_je, Compiler::ACK_DIV_BY_ZERO);
+
+ // Check if the divisor is not -1 branch to 'sdivLabel'
+ emit->emitIns_R_I(INS_cmp, cmpSize, divisorReg, -1);
+ inst_JMP(genJumpKindForOper(GT_NE, true), sdivLabel);
+ // If control flow continues past here the 'divisorReg' is known to be -1
+ }
+
+ if (checkDividend)
+ {
+ regNumber dividendReg = treeNode->gtGetOp1()->gtRegNum;
+ // At this point the divisor is known to be -1
+ //
+ // Issue 'adds zr, dividendReg, dividendReg' instruction
+ // this will set the Z and V flags only when dividendReg is MinInt
+ //
+ emit->emitIns_R_R_R(INS_adds, cmpSize, REG_ZR, dividendReg, dividendReg);
+ inst_JMP(genJumpKindForOper(GT_NE, true), sdivLabel); // goto sdiv if Z flag is clear
+ genJumpToThrowHlpBlk(EJ_jo, Compiler::ACK_ARITH_EXCPN); // if the V flags is set throw ArithmeticException
+ }
+
+ genDefineTempLabel(sdivLabel);
+ genCodeForBinary(treeNode); // Generate the sdiv instruction
+ }
+ else // (treeNode->gtOper == GT_UDIV)
+ {
+ // Only one possible exception
+ // (AnyVal / 0) => DivideByZeroException
+ //
+ // Note that division by the constant 0 was already checked for above by the op2->IsZero() check
+ //
+ if (!divisorOp->isContainedIntOrIImmed())
+ {
+ emit->emitIns_R_I(INS_cmp, cmpSize, divisorReg, 0);
+ genJumpToThrowHlpBlk(EJ_je, Compiler::ACK_DIV_BY_ZERO);
+ }
+
+ genCodeForBinary(treeNode); // Generate the udiv instruction
+ }
+ }
+ }
+ break;
+
+ case GT_OR:
+ case GT_XOR:
+ case GT_AND:
+ assert(varTypeIsIntegralOrI(treeNode));
+ __fallthrough;
+ case GT_ADD:
+ case GT_SUB:
+ case GT_MUL:
+ genCodeForBinary(treeNode);
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ genCodeForShift(treeNode->gtGetOp1(), treeNode->gtGetOp2(), treeNode);
+ // genCodeForShift() calls genProduceReg()
+ break;
+
+ case GT_CAST:
+ if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
+ {
+ // Casts float/double <--> double/float
+ genFloatToFloatCast(treeNode);
+ }
+ else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
+ {
+ // Casts float/double --> int32/int64
+ genFloatToIntCast(treeNode);
+ }
+ else if (varTypeIsFloating(targetType))
+ {
+ // Casts int32/uint32/int64/uint64 --> float/double
+ genIntToFloatCast(treeNode);
+ }
+ else
+ {
+ // Casts int <--> int
+ genIntToIntCast(treeNode);
+ }
+ // The per-case functions call genProduceReg()
+ break;
+
+ case GT_LCL_VAR:
+ {
+ // lcl_vars are not defs
+ assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
+
+ GenTreeLclVarCommon *lcl = treeNode->AsLclVarCommon();
+ bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
+
+ if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
+ {
+ assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
+ }
+
+ // If this is a register candidate that has been spilled, genConsumeReg() will
+ // reload it at the point of use. Otherwise, if it's not in a register, we load it here.
+
+ if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
+ {
+ assert(!isRegCandidate);
+ emit->emitIns_R_S(ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)),
+ emitTypeSize(treeNode), targetReg, lcl->gtLclNum, 0);
+ genProduceReg(treeNode);
+ }
+ }
+ break;
+
+ case GT_LCL_FLD_ADDR:
+ case GT_LCL_VAR_ADDR:
+ // Address of a local var. This by itself should never be allocated a register.
+ // If it is worth storing the address in a register then it should be cse'ed into
+ // a temp and that would be allocated a register.
+ noway_assert(targetType == TYP_BYREF);
+ noway_assert(!treeNode->InReg());
+
+ inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_LCL_FLD:
+ {
+ noway_assert(targetType != TYP_STRUCT);
+ noway_assert(targetReg != REG_NA);
+
+ unsigned offs = treeNode->gtLclFld.gtLclOffs;
+ unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
+ assert(varNum < compiler->lvaCount);
+
+ emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), EA_8BYTE, targetReg, varNum, offs);
+ genProduceReg(treeNode);
+ }
+ break;
+
+ case GT_STORE_LCL_FLD:
+ {
+ NYI_IF(varTypeIsFloating(targetType), "Code generation for FP field assignment");
+
+ noway_assert(targetType != TYP_STRUCT);
+ noway_assert(!treeNode->InReg());
+
+ unsigned offs = treeNode->gtLclFld.gtLclOffs;
+ unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
+ assert(varNum < compiler->lvaCount);
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ genConsumeRegs(op1);
+
+ emit->emitIns_R_S(ins_Store(targetType), emitTypeSize(targetType), op1->gtRegNum, varNum, offs);
+ }
+ break;
+
+ case GT_STORE_LCL_VAR:
+ {
+ noway_assert(targetType != TYP_STRUCT);
+
+ unsigned lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+
+ // Ensure that lclVar nodes are typed correctly.
+ assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ genConsumeRegs(op1);
+ if (targetReg == REG_NA)
+ {
+ // stack store
+ emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)), emitTypeSize(treeNode), treeNode);
+ varDsc->lvRegNum = REG_STK;
+ }
+ else // store into register (i.e move into register)
+ {
+ if (op1->isContained())
+ {
+ // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
+ // must be a constant. However, in the future we might want to support a contained memory op.
+ // This is a bit tricky because we have to decide it's contained before register allocation,
+ // and this would be a case where, once that's done, we need to mark that node as always
+ // requiring a register - which we always assume now anyway, but once we "optimize" that
+ // we'll have to take cases like this into account.
+ assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
+ genSetRegToConst(targetReg, targetType, op1);
+ }
+ else if (op1->gtRegNum != targetReg)
+ {
+ // Setup targetReg when op1 is not a matching register
+ assert(op1->gtRegNum != REG_NA);
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
+ }
+ genProduceReg(treeNode);
+ }
+ }
+ break;
+
+ case GT_RETFILT:
+ // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
+ // the return register, if it's not already there. The processing is the same as GT_RETURN.
+ if (targetType != TYP_VOID)
+ {
+ // For filters, the IL spec says the result is type int32. Further, the only specified legal values
+ // are 0 or 1, with the use of other values "undefined".
+ assert(targetType == TYP_INT);
+ }
+
+ __fallthrough;
+
+ case GT_RETURN:
+ {
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ if (targetType == TYP_VOID)
+ {
+ assert(op1 == nullptr);
+ }
+ else
+ {
+ assert(op1 != nullptr);
+ noway_assert(op1->gtRegNum != REG_NA);
+
+ genConsumeReg(op1);
+
+ regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
+
+ bool movRequired = (op1->gtRegNum != retReg);
+
+ if (!movRequired)
+ {
+ if (op1->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVarCommon *lcl = op1->AsLclVarCommon();
+ bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
+ if (isRegCandidate && ((op1->gtFlags & GTF_SPILLED) == 0))
+ {
+ assert(op1->InReg());
+
+ // We may need to generate a zero-extending mov instruction to load the value from this GT_LCL_VAR
+
+ unsigned lclNum = lcl->gtLclNum;
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+ var_types op1Type = genActualType(op1->TypeGet());
+ var_types lclType = genActualType(varDsc->TypeGet());
+
+ if (genTypeSize(op1Type) < genTypeSize(lclType))
+ {
+ movRequired = true;
+ }
+ }
+ }
+ }
+
+ if (movRequired)
+ {
+ emitAttr movSize = EA_ATTR(genTypeSize(targetType));
+ getEmitter()->emitIns_R_R(INS_mov, movSize, retReg, op1->gtRegNum);
+ }
+ }
+
+#ifdef PROFILING_SUPPORTED
+ // There will be a single return block while generating profiler ELT callbacks.
+ //
+ // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
+ // In flowgraph and other places assert that the last node of a block marked as
+ // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call. It would be nice to
+ // maintain such an invariant irrespective of whether profiler hook needed or not.
+ // Also, there is not much to be gained by materializing it as an explicit node.
+ if (compiler->compCurBB == compiler->genReturnBB)
+ {
+ genProfilingLeaveCallback();
+ }
+#endif
+ }
+ break;
+
+ case GT_LEA:
+ {
+ // if we are here, it is the case where there is an LEA that cannot
+ // be folded into a parent instruction
+ GenTreeAddrMode *lea = treeNode->AsAddrMode();
+ genLeaInstruction(lea);
+ }
+ // genLeaInstruction calls genProduceReg()
+ break;
+
+ case GT_IND:
+ genConsumeAddress(treeNode->AsIndir()->Addr());
+ emit->emitInsMov(ins_Load(targetType), emitTypeSize(treeNode), treeNode);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_MULHI:
+ genCodeForMulHi(treeNode->AsOp());
+ genProduceReg(treeNode);
+ break;
+
+ case GT_MOD:
+ case GT_UMOD:
+ // Integer MOD should have been morphed into a sequence of sub, mul, div in fgMorph.
+ //
+ // We shouldn't be seeing GT_MOD on float/double as it is morphed into a helper call by front-end.
+ noway_assert(!"Codegen for GT_MOD/GT_UMOD");
+ break;
+
+ case GT_MATH:
+ genMathIntrinsic(treeNode);
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ genSIMDIntrinsic(treeNode->AsSIMD());
+ break;
+#endif // FEATURE_SIMD
+
+ case GT_CKFINITE:
+ genCkfinite(treeNode);
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ {
+ // TODO-ARM64-CQ: Check if we can use the currently set flags.
+ // TODO-ARM64-CQ: Check for the case where we can simply transfer the carry bit to a register
+ // (signed < or >= where targetReg != REG_NA)
+
+ GenTreeOp* tree = treeNode->AsOp();
+ GenTreePtr op1 = tree->gtOp1;
+ GenTreePtr op2 = tree->gtOp2;
+ var_types op1Type = op1->TypeGet();
+ var_types op2Type = op2->TypeGet();
+
+ assert(!op1->isContainedMemoryOp());
+ assert(!op2->isContainedMemoryOp());
+
+ genConsumeOperands(tree);
+
+ emitAttr cmpSize = EA_UNKNOWN;
+
+ if (varTypeIsFloating(op1Type))
+ {
+ assert(varTypeIsFloating(op2Type));
+ assert(!op1->isContained());
+ assert(op1Type == op2Type);
+ cmpSize = EA_ATTR(genTypeSize(op1Type));
+
+ if (op2->IsZero())
+ {
+ emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0);
+ }
+ else
+ {
+ assert(!op2->isContained());
+ emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum);
+ }
+ }
+ else
+ {
+ assert(!varTypeIsFloating(op2Type));
+ // We don't support swapping op1 and op2 to generate cmp reg, imm
+ assert(!op1->isContainedIntOrIImmed());
+
+ // TODO-ARM64-CQ: the second register argument of a CMP can be sign/zero
+ // extended as part of the instruction (using "CMP (extended register)").
+ // We should use that if possible, swapping operands
+ // (and reversing the condition) if necessary.
+ unsigned op1Size = genTypeSize(op1Type);
+ unsigned op2Size = genTypeSize(op2Type);
+
+ if ((op1Size < 4) || (op1Size < op2Size))
+ {
+ // We need to sign/zero extend op1 up to 32 or 64 bits.
+ instruction ins = ins_Move_Extend(op1Type, true);
+ inst_RV_RV(ins, op1->gtRegNum, op1->gtRegNum);
+ }
+
+ if (!op2->isContainedIntOrIImmed())
+ {
+ if ((op2Size < 4) || (op2Size < op1Size))
+ {
+ // We need to sign/zero extend op2 up to 32 or 64 bits.
+ instruction ins = ins_Move_Extend(op2Type, true);
+ inst_RV_RV(ins, op2->gtRegNum, op2->gtRegNum);
+ }
+ }
+ cmpSize = EA_4BYTE;
+ if ((op1Size == EA_8BYTE) || (op2Size == EA_8BYTE))
+ {
+ cmpSize = EA_8BYTE;
+ }
+
+ if (op2->isContainedIntOrIImmed())
+ {
+ GenTreeIntConCommon* intConst = op2->AsIntConCommon();
+ emit->emitIns_R_I(INS_cmp, cmpSize, op1->gtRegNum, intConst->IconValue());
+ }
+ else
+ {
+ emit->emitIns_R_R(INS_cmp, cmpSize, op1->gtRegNum, op2->gtRegNum);
+ }
+ }
+
+ // Are we evaluating this into a register?
+ if (targetReg != REG_NA)
+ {
+ genSetRegToCond(targetReg, tree);
+ genProduceReg(tree);
+ }
+ }
+ break;
+
+ case GT_JTRUE:
+ {
+ GenTree *cmp = treeNode->gtOp.gtOp1->gtEffectiveVal();
+ assert(cmp->OperIsCompare());
+ assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+ // Get the "jmpKind" using the gtOper kind
+ // Note that whether it is an unsigned cmp is governed by the GTF_UNSIGNED flags
+
+ emitJumpKind jmpKind = genJumpKindForOper(cmp->gtOper, (cmp->gtFlags & GTF_UNSIGNED) != 0);
+ BasicBlock * jmpTarget = compiler->compCurBB->bbJumpDest;
+
+ inst_JMP(jmpKind, jmpTarget);
+ }
+ break;
+
+ case GT_RETURNTRAP:
+ {
+ // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
+ // based on the contents of 'data'
+
+ GenTree *data = treeNode->gtOp.gtOp1;
+ genConsumeRegs(data);
+ emit->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0);
+
+ BasicBlock* skipLabel = genCreateTempLabel();
+
+ inst_JMP(genJumpKindForOper(GT_EQ, true), skipLabel);
+ // emit the call to the EE-helper that stops for GC (or other reasons)
+
+ genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN);
+ genDefineTempLabel(skipLabel);
+ }
+ break;
+
+ case GT_STOREIND:
+ {
+ GenTree* data = treeNode->gtOp.gtOp2;
+ GenTree* addr = treeNode->gtOp.gtOp1;
+ GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data);
+ if (writeBarrierForm != GCInfo::WBF_NoBarrier)
+ {
+ // data and addr must be in registers.
+ // Consume both registers so that any copies of interfering
+ // registers are taken care of.
+ genConsumeOperands(treeNode->AsOp());
+
+#if NOGC_WRITE_BARRIERS
+ // At this point, we should not have any interference.
+ // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF,
+ // as that is where 'addr' must go.
+ noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF);
+
+ // 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
+ if (addr->gtRegNum != REG_WRITE_BARRIER_DST_BYREF)
+ {
+ inst_RV_RV(INS_mov, REG_WRITE_BARRIER_DST_BYREF, addr->gtRegNum, addr->TypeGet());
+ }
+
+ // 'data' goes into x15 (REG_WRITE_BARRIER)
+ if (data->gtRegNum != REG_WRITE_BARRIER)
+ {
+ inst_RV_RV(INS_mov, REG_WRITE_BARRIER, data->gtRegNum, data->TypeGet());
+ }
+#else
+ // At this point, we should not have any interference.
+ // That is, 'data' must not be in REG_ARG_0,
+ // as that is where 'addr' must go.
+ noway_assert(data->gtRegNum != REG_ARG_0);
+
+ // addr goes in REG_ARG_0
+ if (addr->gtRegNum != REG_ARG_0)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
+ }
+
+ // data goes in REG_ARG_1
+ if (data->gtRegNum != REG_ARG_1)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
+ }
+#endif // NOGC_WRITE_BARRIERS
+
+ genGCWriteBarrier(treeNode, writeBarrierForm);
+ }
+ else
+ {
+ bool reverseOps = ((treeNode->gtFlags & GTF_REVERSE_OPS) != 0);
+ bool dataIsUnary = false;
+ GenTree* nonRMWsrc = nullptr;
+ // We must consume the operands in the proper execution order,
+ // so that liveness is updated appropriately.
+ if (!reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+ if (data->isContained() && !data->OperIsLeaf())
+ {
+ dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0);
+ if (!dataIsUnary)
+ {
+ nonRMWsrc = data->gtGetOp1();
+ if (nonRMWsrc->isIndir() && Lowering::IndirsAreEquivalent(nonRMWsrc, treeNode))
+ {
+ nonRMWsrc = data->gtGetOp2();
+ }
+ genConsumeRegs(nonRMWsrc);
+ }
+ }
+ else
+ {
+ genConsumeRegs(data);
+ }
+ if (reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+ if (data->isContained() && !data->OperIsLeaf())
+ {
+ NYI("RMW?");
+ }
+ else
+ {
+ emit->emitInsMov(ins_Store(targetType), emitTypeSize(treeNode), treeNode);
+ }
+ }
+ }
+ break;
+
+ case GT_COPY:
+ // This is handled at the time we call genConsumeReg() on the GT_COPY
+ break;
+
+ case GT_SWAP:
+ {
+ // Swap is only supported for lclVar operands that are enregistered
+ // We do not consume or produce any registers. Both operands remain enregistered.
+ // However, the gc-ness may change.
+ assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2));
+
+ GenTreeLclVarCommon* lcl1 = treeNode->gtOp.gtOp1->AsLclVarCommon();
+ LclVarDsc* varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
+ var_types type1 = varDsc1->TypeGet();
+ GenTreeLclVarCommon* lcl2 = treeNode->gtOp.gtOp2->AsLclVarCommon();
+ LclVarDsc* varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
+ var_types type2 = varDsc2->TypeGet();
+
+ // We must have both int or both fp regs
+ assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
+
+ // FP swap is not yet implemented (and should have NYI'd in LSRA)
+ assert(!varTypeIsFloating(type1));
+
+ regNumber oldOp1Reg = lcl1->gtRegNum;
+ regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
+ regNumber oldOp2Reg = lcl2->gtRegNum;
+ regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
+
+ // We don't call genUpdateVarReg because we don't have a tree node with the new register.
+ varDsc1->lvRegNum = oldOp2Reg;
+ varDsc2->lvRegNum = oldOp1Reg;
+
+ // Do the xchg
+ emitAttr size = EA_PTRSIZE;
+ if (varTypeGCtype(type1) != varTypeGCtype(type2))
+ {
+ // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
+ // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
+ size = EA_GCREF;
+ }
+
+ NYI("register swap");
+ // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
+
+ // Update the gcInfo.
+ // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
+ gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask|oldOp2RegMask);
+ gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask|oldOp2RegMask);
+
+ // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
+ // It will also dump the updates.
+ gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
+ gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
+ }
+ break;
+
+ case GT_LIST:
+ case GT_ARGPLACE:
+ // Nothing to do
+ break;
+
+ case GT_PUTARG_STK:
+ {
+ noway_assert(targetType != TYP_STRUCT);
+
+ // Get argument offset on stack.
+ // Here we cross check that argument offset hasn't changed from lowering to codegen since
+ // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
+ int argOffset = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE;
+
+#ifdef DEBUG
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
+ assert(curArgTabEntry);
+ assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
+#endif
+
+ GenTreePtr data = treeNode->gtOp.gtOp1;
+ unsigned varNum;
+
+#if FEATURE_FASTTAILCALL
+ bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
+#else
+ const bool putInIncomingArgArea = false;
+#endif
+ // Whether to setup stk arg in incoming or out-going arg area?
+ // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
+ // All other calls - stk arg is setup in out-going arg area.
+ if (putInIncomingArgArea)
+ {
+ // The first varNum is guaranteed to be the first incoming arg of the method being compiled.
+ // See lvaInitTypeRef() for the order in which lvaTable entries are initialized.
+ varNum = 0;
+#ifdef DEBUG
+#if FEATURE_FASTTAILCALL
+ // This must be a fast tail call.
+ assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
+
+ // Since it is a fast tail call, the existence of first incoming arg is guaranteed
+ // because fast tail call requires that in-coming arg area of caller is >= out-going
+ // arg area required for tail call.
+ LclVarDsc* varDsc = compiler->lvaTable;
+ assert(varDsc != nullptr);
+ assert(varDsc->lvIsRegArg && ((varDsc->lvArgReg == REG_ARG_0) || (varDsc->lvArgReg == REG_FLTARG_0)));
+#endif // FEATURE_FASTTAILCALL
+#endif
+ }
+ else
+ {
+ varNum = compiler->lvaOutgoingArgSpaceVar;
+ }
+
+ if (data->isContained())
+ {
+ getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), varNum,
+ argOffset, (int) data->AsIntConCommon()->IconValue());
+ }
+ else
+ {
+ genConsumeReg(data);
+ getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, varNum, argOffset);
+ }
+ }
+ break;
+
+ case GT_PUTARG_REG:
+ {
+ noway_assert(targetType != TYP_STRUCT);
+
+ // commas show up here commonly, as part of a nullchk operation
+ GenTree *op1 = treeNode->gtOp.gtOp1;
+ // If child node is not already in the register we need, move it
+ genConsumeReg(op1);
+ if (targetReg != op1->gtRegNum)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
+ }
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_CALL:
+ genCallInstruction(treeNode);
+ break;
+
+ case GT_JMP:
+ genJmpMethod(treeNode);
+ break;
+
+ case GT_LOCKADD:
+ case GT_XCHG:
+ case GT_XADD:
+ genLockedInstructions(treeNode);
+ break;
+
+ case GT_MEMORYBARRIER:
+ instGen_MemoryBarrier();
+ break;
+
+ case GT_CMPXCHG:
+ NYI("GT_CMPXCHG");
+ break;
+
+ case GT_RELOAD:
+ // do nothing - reload is just a marker.
+ // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
+ // into the register specified in this node.
+ break;
+
+ case GT_NOP:
+ break;
+
+ case GT_NO_OP:
+ if (treeNode->gtFlags & GTF_NO_OP_NO)
+ {
+ noway_assert(!"GTF_NO_OP_NO should not be set");
+ }
+ else
+ {
+ instGen(INS_nop);
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ genRangeCheck(treeNode);
+ break;
+
+ case GT_PHYSREG:
+ if (targetReg != treeNode->AsPhysReg()->gtSrcReg)
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, treeNode->AsPhysReg()->gtSrcReg, targetType);
+
+ genTransferRegGCState(targetReg, treeNode->AsPhysReg()->gtSrcReg);
+ }
+ genProduceReg(treeNode);
+ break;
+
+ case GT_PHYSREGDST:
+ break;
+
+ case GT_NULLCHECK:
+ {
+ assert(!treeNode->gtOp.gtOp1->isContained());
+ regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
+ emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, reg, 0);
+ }
+ break;
+
+ case GT_CATCH_ARG:
+
+ noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
+
+ /* Catch arguments get passed in a register. genCodeForBBlist()
+ would have marked it as holding a GC object, but not used. */
+
+ noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
+ genConsumeReg(treeNode);
+ break;
+
+ case GT_PINVOKE_PROLOG:
+ noway_assert(((gcInfo.gcRegGCrefSetCur|gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0);
+
+ // the runtime side requires the codegen here to be consistent
+ emit->emitDisableRandomNops();
+ break;
+
+ case GT_LABEL:
+ genPendingCallLabel = genCreateTempLabel();
+ treeNode->gtLabel.gtLabBB = genPendingCallLabel;
+ emit->emitIns_R_L(INS_adr, EA_PTRSIZE, genPendingCallLabel, targetReg);
+ break;
+
+ case GT_COPYOBJ:
+ genCodeForCpObj(treeNode->AsCpObj());
+ break;
+
+ case GT_COPYBLK:
+ {
+ GenTreeCpBlk* cpBlkOp = treeNode->AsCpBlk();
+ if (cpBlkOp->gtBlkOpGcUnsafe)
+ {
+ getEmitter()->emitDisableGC();
+ }
+
+ switch (cpBlkOp->gtBlkOpKind)
+ {
+ case GenTreeBlkOp::BlkOpKindHelper:
+ genCodeForCpBlk(cpBlkOp);
+ break;
+ case GenTreeBlkOp::BlkOpKindUnroll:
+ genCodeForCpBlkUnroll(cpBlkOp);
+ break;
+ default:
+ unreached();
+ }
+ if (cpBlkOp->gtBlkOpGcUnsafe)
+ {
+ getEmitter()->emitEnableGC();
+ }
+ }
+ break;
+
+ case GT_INITBLK:
+ {
+ GenTreeInitBlk* initBlkOp = treeNode->AsInitBlk();
+ switch (initBlkOp->gtBlkOpKind)
+ {
+ case GenTreeBlkOp::BlkOpKindHelper:
+ genCodeForInitBlk(initBlkOp);
+ break;
+ case GenTreeBlkOp::BlkOpKindUnroll:
+ genCodeForInitBlkUnroll(initBlkOp);
+ break;
+ default:
+ unreached();
+ }
+ }
+ break;
+
+ case GT_JMPTABLE:
+ genJumpTable(treeNode);
+ break;
+
+ case GT_SWITCH_TABLE:
+ genTableBasedSwitch(treeNode);
+ break;
+
+ case GT_ARR_INDEX:
+ genCodeForArrIndex(treeNode->AsArrIndex());
+ break;
+
+ case GT_ARR_OFFSET:
+ genCodeForArrOffset(treeNode->AsArrOffs());
+ break;
+
+ case GT_CLS_VAR_ADDR:
+ NYI("GT_CLS_VAR_ADDR");
+ break;
+
+ default:
+ {
+#ifdef DEBUG
+ char message[256];
+ sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
+#endif
+ assert(!"Unknown node in codegen");
+ }
+ break;
+ }
+}
+
+
+// Generate code for division (or mod) by power of two
+// or negative powers of two. (meaning -1 * a power of two, not 2^(-1))
+// Op2 must be a contained integer constant.
+void
+CodeGen::genCodeForPow2Div(GenTreeOp* tree)
+{
+#if 0
+ GenTree *dividend = tree->gtOp.gtOp1;
+ GenTree *divisor = tree->gtOp.gtOp2;
+ genTreeOps oper = tree->OperGet();
+ emitAttr size = emitTypeSize(tree);
+ emitter *emit = getEmitter();
+ regNumber targetReg = tree->gtRegNum;
+ var_types targetType = tree->TypeGet();
+
+ bool isSigned = oper == GT_MOD || oper == GT_DIV;
+
+ // precondition: extended dividend is in RDX:RAX
+ // which means it is either all zeros or all ones
+
+ noway_assert(divisor->isContained());
+ GenTreeIntConCommon* divImm = divisor->AsIntConCommon();
+ int64_t imm = divImm->IconValue();
+ ssize_t abs_imm = abs(imm);
+ noway_assert(isPow2(abs_imm));
+
+
+ if (isSigned)
+ {
+ if (imm == 1)
+ {
+ if (targetReg != REG_RAX)
+ inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
+
+ return;
+ }
+
+ if (abs_imm == 2)
+ {
+ if (oper == GT_MOD)
+ {
+ emit->emitIns_R_I(INS_and, size, REG_RAX, 1); // result is 0 or 1
+ // xor with rdx will flip all bits if negative
+ emit->emitIns_R_R(INS_xor, size, REG_RAX, REG_RDX); // 111.11110 or 0
+ }
+ else
+ {
+ assert(oper == GT_DIV);
+ // add 1 if it's negative
+ emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
+ }
+ }
+ else
+ {
+ // add imm-1 if negative
+ emit->emitIns_R_I(INS_and, size, REG_RDX, abs_imm - 1);
+ emit->emitIns_R_R(INS_add, size, REG_RAX, REG_RDX);
+ }
+
+ if (oper == GT_DIV)
+ {
+ unsigned shiftAmount = genLog2(unsigned(abs_imm));
+ inst_RV_SH(INS_sar, size, REG_RAX, shiftAmount);
+
+ if (imm < 0)
+ {
+ emit->emitIns_R(INS_neg, size, REG_RAX);
+ }
+ }
+ else
+ {
+ assert(oper == GT_MOD);
+ if (abs_imm > 2)
+ {
+ emit->emitIns_R_I(INS_and, size, REG_RAX, abs_imm - 1);
+ }
+ // RDX contains 'imm-1' if negative
+ emit->emitIns_R_R(INS_sub, size, REG_RAX, REG_RDX);
+ }
+
+ if (targetReg != REG_RAX)
+ {
+ inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
+ }
+ }
+ else
+ {
+ assert (imm > 0);
+
+ if (targetReg != dividend->gtRegNum)
+ {
+ inst_RV_RV(INS_mov, targetReg, dividend->gtRegNum, targetType);
+ }
+
+ if (oper == GT_UDIV)
+ {
+ inst_RV_SH(INS_shr, size, targetReg, genLog2(unsigned(imm)));
+ }
+ else
+ {
+ assert(oper == GT_UMOD);
+
+ emit->emitIns_R_I(INS_and, size, targetReg, imm -1);
+ }
+ }
+#else // !0
+ NYI("genCodeForPow2Div");
+#endif // !0
+}
+
+
+/***********************************************************************************************
+ * Generate code for localloc
+ */
+void
+CodeGen::genLclHeap(GenTreePtr tree)
+{
+ assert(tree->OperGet() == GT_LCLHEAP);
+
+ GenTreePtr size = tree->gtOp.gtOp1;
+ noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
+
+ regNumber targetReg = tree->gtRegNum;
+ regMaskTP tmpRegsMask = tree->gtRsvdRegs;
+ regNumber regCnt = REG_NA;
+ regNumber pspSymReg = REG_NA;
+ var_types type = genActualType(size->gtType);
+ emitAttr easz = emitTypeSize(type);
+ BasicBlock* endLabel = nullptr;
+
+#ifdef DEBUG
+ // Verify ESP
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+
+ BasicBlock * esp_check = genCreateTempLabel();
+ inst_JMP(genJumpKindForOper(GT_EQ, true), esp_check);
+ getEmitter()->emitIns(INS_BREAKPOINT);
+ genDefineTempLabel(esp_check);
+ }
+#endif
+
+ noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
+ noway_assert(genStackLevel == 0); // Can't have anything on the stack
+
+ // Whether method has PSPSym.
+ bool hasPspSym;
+#if FEATURE_EH_FUNCLETS
+ hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM);
+#else
+ hasPspSym = false;
+#endif
+
+ // compute the amount of memory to allocate to properly STACK_ALIGN.
+ size_t amount = 0;
+ if (size->IsCnsIntOrI())
+ {
+ // If size is a constant, then it must be contained.
+ assert(size->isContained());
+
+ // If amount is zero then return null in targetReg
+ amount = size->gtIntCon.gtIconVal;
+ if (amount == 0)
+ {
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
+ goto BAILOUT;
+ }
+
+ // 'amount' is the total numbe of bytes to localloc to properly STACK_ALIGN
+ amount = AlignUp(amount, STACK_ALIGN);
+ }
+ else
+ {
+ // If 0 bail out by returning null in targetReg
+ genConsumeRegAndCopy(size, targetReg);
+ endLabel = genCreateTempLabel();
+ getEmitter()->emitIns_R_R(INS_TEST, easz, targetReg, targetReg);
+ inst_JMP(EJ_je, endLabel);
+
+ // Compute the size of the block to allocate and perform alignment.
+ // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt,
+ // since we don't need any internal registers.
+ if (!hasPspSym && compiler->info.compInitMem)
+ {
+ assert(genCountBits(tmpRegsMask) == 0);
+ regCnt = targetReg;
+ }
+ else
+ {
+ assert(genCountBits(tmpRegsMask) >= 1);
+ regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~regCntMask;
+ regCnt = genRegNumFromMask(regCntMask);
+ if (regCnt != targetReg)
+ inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
+ }
+
+ // Align to STACK_ALIGN
+ // regCnt will be the total number of bytes to localloc
+ inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
+ inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
+ }
+
+ unsigned stackAdjustment = 0;
+#if FEATURE_EH_FUNCLETS
+ // If we have PSPsym, then need to re-locate it after localloc.
+ if (hasPspSym)
+ {
+ stackAdjustment += STACK_ALIGN;
+
+ // Save a copy of PSPSym
+ assert(genCountBits(tmpRegsMask) >= 1);
+ regMaskTP pspSymRegMask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~pspSymRegMask;
+ pspSymReg = genRegNumFromMask(pspSymRegMask);
+ getEmitter()->emitIns_R_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0);
+ }
+#endif
+
+
+#if FEATURE_FIXED_OUT_ARGS
+ // If we have an outgoing arg area then we must adjust the SP by popping off the
+ // outgoing arg area. We will restore it right before we return from this method.
+ //
+ // Localloc is supposed to return stack space that is STACK_ALIGN'ed. The following
+ // are the cases that needs to be handled:
+ // i) Method has PSPSym + out-going arg area.
+ // It is guaranteed that size of out-going arg area is STACK_ALIGNED (see fgMorphArgs).
+ // Therefore, we will pop-off RSP upto out-going arg area before locallocating.
+ // We need to add padding to ensure RSP is STACK_ALIGN'ed while re-locating PSPSym + arg area.
+ // ii) Method has no PSPSym but out-going arg area.
+ // Almost same case as above without the requirement to pad for the final RSP to be STACK_ALIGN'ed.
+ // iii) Method has PSPSym but no out-going arg area.
+ // Nothing to pop-off from the stack but needs to relocate PSPSym with SP padded.
+ // iv) Method has neither PSPSym nor out-going arg area.
+ // Nothing needs to popped off from stack nor relocated.
+ if (compiler->lvaOutgoingArgSpaceSize > 0)
+ {
+ assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
+ inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+ stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
+ }
+#endif
+
+ if (size->IsCnsIntOrI())
+ {
+ // We should reach here only for non-zero, constant size allocations.
+ assert(amount > 0);
+
+ // For small allocations we will generate up to four stp instructions
+ size_t cntStackAlignedWidthItems = (amount >> STACK_ALIGN_SHIFT);
+ if (cntStackAlignedWidthItems <= 4)
+ {
+ while (cntStackAlignedWidthItems != 0)
+ {
+ // We can use pre-indexed addressing.
+ // stp ZR, ZR, [SP, #-16]!
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX);
+ cntStackAlignedWidthItems -= 1;
+ }
+
+ goto ALLOC_DONE;
+ }
+ else if (!compiler->info.compInitMem && (amount < CORINFO_PAGE_SIZE)) // must be < not <=
+ {
+ // Since the size is a page or less, simply adjust ESP
+ // ESP might already be in the guard page, must touch it BEFORE
+ // the alloc, not after.
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+ inst_RV_IV(INS_sub, REG_SPBASE, amount, EA_PTRSIZE);
+
+ goto ALLOC_DONE;
+ }
+
+ // else, "mov regCnt, amount"
+ // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt.
+ // Since size is a constant, regCnt is not yet initialized.
+ assert(regCnt == REG_NA);
+ if (!hasPspSym && compiler->info.compInitMem)
+ {
+ assert(genCountBits(tmpRegsMask) == 0);
+ regCnt = targetReg;
+ }
+ else
+ {
+ assert(genCountBits(tmpRegsMask) >= 1);
+ regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
+ tmpRegsMask &= ~regCntMask;
+ regCnt = genRegNumFromMask(regCntMask);
+ }
+ genSetRegToIcon(regCnt, amount, ((int)amount == amount)? TYP_INT : TYP_LONG);
+ }
+
+ BasicBlock* loop = genCreateTempLabel();
+ if (compiler->info.compInitMem)
+ {
+ // At this point 'regCnt' is set to the total number of bytes to locAlloc.
+ // Since we have to zero out the allocated memory AND ensure that RSP is always valid
+ // by tickling the pages, we will just push 0's on the stack.
+ //
+ // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2
+ // and localloc size is a multiple of STACK_ALIGN.
+
+ // Loop:
+ genDefineTempLabel(loop);
+
+ // We can use pre-indexed addressing.
+ // stp ZR, ZR, [SP, #-16]!
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX);
+
+ // If not done, loop
+ // Note that regCnt is the number of bytes to stack allocate.
+ // Therefore we need to subtract 16 from regcnt here.
+ assert(genIsValidIntReg(regCnt));
+ inst_RV_IV(INS_subs, regCnt, 16, emitActualTypeSize(type));
+ inst_JMP(EJ_jne, loop);
+ }
+ else
+ {
+ //At this point 'regCnt' is set to the total number of bytes to locAlloc.
+ //
+ //We don't need to zero out the allocated memory. However, we do have
+ //to tickle the pages to ensure that ESP is always valid and is
+ //in sync with the "stack guard page". Note that in the worst
+ //case ESP is on the last byte of the guard page. Thus you must
+ //touch ESP+0 first not ESP+x01000.
+ //
+ //Another subtlety is that you don't want ESP to be exactly on the
+ //boundary of the guard page because PUSH is predecrement, thus
+ //call setup would not touch the guard page but just beyond it
+ //
+ //Note that we go through a few hoops so that ESP never points to
+ //illegal pages at any time during the ticking process
+ //
+ // neg REGCNT
+ // add REGCNT, ESP // reg now holds ultimate ESP
+ // jb loop // result is smaller than orignial ESP (no wrap around)
+ // xor REGCNT, REGCNT, // Overflow, pick lowest possible number
+ // loop:
+ // test ESP, [ESP+0] // tickle the page
+ // mov REGTMP, ESP
+ // sub REGTMP, PAGE_SIZE
+ // mov ESP, REGTMP
+ // cmp ESP, REGCNT
+ // jae loop
+ //
+ // mov ESP, REG
+ // end:
+ inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
+ inst_RV_RV(INS_adds, regCnt, REG_SPBASE, TYP_I_IMPL);
+ inst_JMP(EJ_jb, loop);
+
+ instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+
+ genDefineTempLabel(loop);
+
+ // Tickle the decremented value, and move back to ESP,
+ // note that it has to be done BEFORE the update of ESP since
+ // ESP might already be on the guard page. It is OK to leave
+ // the final value of ESP on the guard page
+ getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+
+ // This is a harmless workaround to avoid the emitter trying to track the
+ // decrement of the ESP - we do the subtraction in another reg instead
+ // of adjusting ESP directly.
+ assert(tmpRegsMask != RBM_NONE);
+ assert(genCountBits(tmpRegsMask) == 1);
+ regNumber regTmp = genRegNumFromMask(tmpRegsMask);
+
+ inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL);
+ inst_RV_IV(INS_sub, regTmp, CORINFO_PAGE_SIZE, EA_PTRSIZE);
+ inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL);
+
+ inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
+ inst_JMP(EJ_jae, loop);
+
+ // Move the final value to ESP
+ inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
+ }
+
+ALLOC_DONE:
+ // Re-adjust SP to allocate PSPSym and out-going arg area
+ if (stackAdjustment != 0)
+ {
+ assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
+ assert(stackAdjustment > 0);
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, (int) stackAdjustment);
+
+#if FEATURE_EH_FUNCLETS
+ // Write PSPSym to its new location.
+ if (hasPspSym)
+ {
+ assert(genIsValidIntReg(pspSymReg));
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0);
+ }
+#endif
+ // Return the stackalloc'ed address in result register.
+ // TargetReg = RSP + stackAdjustment.
+ //
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, targetReg, REG_SPBASE, (int) stackAdjustment);
+ }
+ else // stackAdjustment == 0
+ {
+ // Move the final value of SP to targetReg
+ inst_RV_RV(INS_mov, targetReg, REG_SPBASE);
+ }
+
+BAILOUT:
+ if (endLabel != nullptr)
+ genDefineTempLabel(endLabel);
+
+ // Write the lvaShadowSPfirst stack frame slot
+ noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaLocAllocSPvar, 0);
+
+#if STACK_PROBES
+ if (compiler->opts.compNeedStackProbes)
+ {
+ genGenerateStackProbe();
+ }
+#endif
+
+#ifdef DEBUG
+ // Update new ESP
+ if (compiler->opts.compStackCheckOnRet)
+ {
+ noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC && compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister && compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+ getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaReturnEspCheck, 0);
+ }
+#endif
+
+ genProduceReg(tree);
+}
+
+// Generate code for InitBlk by performing a loop unroll
+// Preconditions:
+// a) Both the size and fill byte value are integer constants.
+// b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
+void CodeGen::genCodeForInitBlkUnroll(GenTreeInitBlk* initBlkNode)
+{
+#if 0
+ // Make sure we got the arguments of the initblk/initobj operation in the right registers
+ GenTreePtr blockSize = initBlkNode->Size();
+ GenTreePtr dstAddr = initBlkNode->Dest();
+ GenTreePtr initVal = initBlkNode->InitVal();
+
+#ifdef DEBUG
+ assert(!dstAddr->isContained());
+ assert(!initVal->isContained());
+ assert(blockSize->isContained());
+
+ assert(blockSize->IsCnsIntOrI());
+#endif // DEBUG
+
+ size_t size = blockSize->gtIntCon.gtIconVal;
+
+ assert(size <= INITBLK_UNROLL_LIMIT);
+ assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
+
+ emitter *emit = getEmitter();
+
+ genConsumeReg(initVal);
+ genConsumeReg(dstAddr);
+
+ // If the initVal was moved, or spilled and reloaded to a different register,
+ // get the original initVal from below the GT_RELOAD, but only after capturing the valReg,
+ // which needs to be the new register.
+ regNumber valReg = initVal->gtRegNum;
+ initVal = initVal->gtSkipReloadOrCopy();
+#else // !0
+ NYI("genCodeForInitBlkUnroll");
+#endif // !0
+}
+
+// Generates code for InitBlk by calling the VM memset helper function.
+// Preconditions:
+// a) The size argument of the InitBlk is not an integer constant.
+// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
+void CodeGen::genCodeForInitBlk(GenTreeInitBlk* initBlkNode)
+{
+ // Make sure we got the arguments of the initblk operation in the right registers
+ GenTreePtr blockSize = initBlkNode->Size();
+ GenTreePtr dstAddr = initBlkNode->Dest();
+ GenTreePtr initVal = initBlkNode->InitVal();
+
+#ifdef DEBUG
+ assert(!dstAddr->isContained());
+ assert(!initVal->isContained());
+ assert(!blockSize->isContained());
+
+ // TODO-ARM64-CQ: When initblk loop unrolling is implemented
+ // put this assert back on.
+#if 0
+ if (blockSize->IsCnsIntOrI())
+ {
+ assert(blockSize->gtIntCon.gtIconVal >= INITBLK_UNROLL_LIMIT);
+ }
+#endif // 0
+#endif // DEBUG
+
+ genConsumeRegAndCopy(blockSize, REG_ARG_2);
+ genConsumeRegAndCopy(initVal, REG_ARG_1);
+ genConsumeRegAndCopy(dstAddr, REG_ARG_0);
+
+ genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
+}
+
+
+// Generate code for a load from some address + offset
+// base: tree node which can be either a local address or arbitrary node
+// offset: distance from the base from which to load
+void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset)
+{
+#if 0
+ emitter *emit = getEmitter();
+
+ if (base->OperIsLocalAddr())
+ {
+ if (base->gtOper == GT_LCL_FLD_ADDR)
+ offset += base->gtLclFld.gtLclOffs;
+ emit->emitIns_R_S(ins, size, dst, base->gtLclVarCommon.gtLclNum, offset);
+ }
+ else
+ {
+ emit->emitIns_R_AR(ins, size, dst, base->gtRegNum, offset);
+ }
+#else // !0
+ NYI("genCodeForLoadOffset");
+#endif // !0
+}
+
+// Generate code for a store to some address + offset
+// base: tree node which can be either a local address or arbitrary node
+// offset: distance from the base from which to load
+void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset)
+{
+#if 0
+ emitter *emit = getEmitter();
+
+ if (base->OperIsLocalAddr())
+ {
+ if (base->gtOper == GT_LCL_FLD_ADDR)
+ offset += base->gtLclFld.gtLclOffs;
+ emit->emitIns_S_R(ins, size, src, base->gtLclVarCommon.gtLclNum, offset);
+ }
+ else
+ {
+ emit->emitIns_AR_R(ins, size, src, base->gtRegNum, offset);
+ }
+#else // !0
+ NYI("genCodeForStoreOffset");
+#endif // !0
+}
+
+
+// Generates CpBlk code by performing a loop unroll
+// Preconditions:
+// The size argument of the CpBlk node is a constant and <= 64 bytes.
+// This may seem small but covers >95% of the cases in several framework assemblies.
+void CodeGen::genCodeForCpBlkUnroll(GenTreeCpBlk* cpBlkNode)
+{
+#if 0
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ GenTreePtr blockSize = cpBlkNode->Size();
+ GenTreePtr dstAddr = cpBlkNode->Dest();
+ GenTreePtr srcAddr = cpBlkNode->Source();
+
+ assert(blockSize->IsCnsIntOrI());
+ size_t size = blockSize->gtIntCon.gtIconVal;
+ assert(size <= CPBLK_UNROLL_LIMIT);
+
+ emitter *emit = getEmitter();
+
+ if (!srcAddr->isContained())
+ genConsumeReg(srcAddr);
+
+ if (!dstAddr->isContained())
+ genConsumeReg(dstAddr);
+
+ unsigned offset = 0;
+
+ // If the size of this struct is larger than 16 bytes
+ // let's use SSE2 to be able to do 16 byte at a time
+ // loads and stores.
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ assert(cpBlkNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(cpBlkNode->gtRsvdRegs) == 1);
+ regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs);
+ assert(genIsValidFloatReg(xmmReg));
+ size_t slots = size / XMM_REGSIZE_BYTES;
+
+ while (slots-- > 0)
+ {
+ // Load
+ genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset);
+ // Store
+ genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
+ offset += XMM_REGSIZE_BYTES;
+ }
+ }
+
+ // Fill the remainder (15 bytes or less) if there's one.
+ if ((size & 0xf) != 0)
+ {
+ // Grab the integer temp register to emit the remaining loads and stores.
+ regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT);
+
+ if ((size & 8) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
+ offset += 8;
+ }
+ if ((size & 4) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+ offset += 4;
+ }
+ if ((size & 2) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
+ offset += 2;
+ }
+ if ((size & 1) != 0)
+ {
+ genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset);
+ genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
+ }
+ }
+#else // !0
+ NYI("genCodeForCpBlkUnroll");
+#endif // !0
+}
+
+// Generate code for CpObj nodes wich copy structs that have interleaved
+// GC pointers.
+// For this case we'll generate a sequence of loads/stores in the case of struct
+// slots that don't contain GC pointers. The generated code will look like:
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+//
+// In the case of a GC-Pointer we'll call the ByRef write barrier helper
+// who happens to use the same registers as the previous call to maintain
+// the same register requirements and register killsets:
+// bl CORINFO_HELP_ASSIGN_BYREF
+//
+// So finally an example would look like this:
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+// bl CORINFO_HELP_ASSIGN_BYREF
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+// bl CORINFO_HELP_ASSIGN_BYREF
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+void CodeGen::genCodeForCpObj(GenTreeCpObj* cpObjNode)
+{
+ // Make sure we got the arguments of the cpobj operation in the right registers
+ GenTreePtr clsTok = cpObjNode->ClsTok();
+ GenTreePtr dstAddr = cpObjNode->Dest();
+ GenTreePtr srcAddr = cpObjNode->Source();
+
+ bool dstOnStack = dstAddr->OperIsLocalAddr();
+
+#ifdef DEBUG
+ assert(!dstAddr->isContained());
+ assert(!srcAddr->isContained());
+
+ // This GenTree node has data about GC pointers, this means we're dealing
+ // with CpObj.
+ assert(cpObjNode->gtGcPtrCount > 0);
+#endif // DEBUG
+
+ // Consume these registers.
+ // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+ genConsumeRegAndCopy(srcAddr, REG_WRITE_BARRIER_SRC_BYREF);
+ gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddr->TypeGet());
+
+ genConsumeRegAndCopy(dstAddr, REG_WRITE_BARRIER_DST_BYREF);
+ gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet());
+
+ // Temp register used to perform the sequence of loads and stores.
+ regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs);
+
+#ifdef DEBUG
+ assert(cpObjNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(cpObjNode->gtRsvdRegs) == 1);
+ assert(genIsValidIntReg(tmpReg));
+#endif // DEBUG
+
+ unsigned slots = cpObjNode->gtSlots;
+ emitter *emit = getEmitter();
+
+ // If we can prove it's on the stack we don't need to use the write barrier.
+ if (dstOnStack)
+ {
+ // TODO-ARM64-CQ: Consider using LDP/STP to save codesize.
+ while (slots > 0)
+ {
+ emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
+ emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
+ slots--;
+ }
+ }
+ else
+ {
+ BYTE* gcPtrs = cpObjNode->gtGcPtrs;
+ unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
+
+ unsigned i = 0;
+ while (i < slots)
+ {
+ switch (gcPtrs[i])
+ {
+ case TYPE_GC_NONE:
+ // TODO-ARM64-CQ: Consider using LDP/STP to save codesize in case of contigous NON-GC slots.
+ emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
+ emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE, INS_OPTS_POST_INDEX);
+ break;
+
+ default:
+ // We have a GC pointer, call the memory barrier.
+ genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
+ gcPtrCount--;
+ break;
+ }
+ ++i;
+ }
+ assert(gcPtrCount == 0);
+ }
+
+ // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF.
+ // While we normally update GC info prior to the last instruction that uses them,
+ // these actually live into the helper call.
+ gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF);
+}
+
+// Generate code for a CpBlk node by the means of the VM memcpy helper call
+// Preconditions:
+// a) The size argument of the CpBlk is not an integer constant
+// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
+void CodeGen::genCodeForCpBlk(GenTreeCpBlk* cpBlkNode)
+{
+ // Make sure we got the arguments of the cpblk operation in the right registers
+ GenTreePtr blockSize = cpBlkNode->Size();
+ GenTreePtr dstAddr = cpBlkNode->Dest();
+ GenTreePtr srcAddr = cpBlkNode->Source();
+
+ assert(!dstAddr->isContained());
+ assert(!srcAddr->isContained());
+ assert(!blockSize->isContained());
+
+ // Enable this when we support cpblk loop unrolling.
+#if 0
+#ifdef DEBUG
+ if (blockSize->IsCnsIntOrI())
+ {
+ assert(blockSize->gtIntCon.gtIconVal >= CPBLK_UNROLL_LIMIT);
+ }
+#endif // DEBUG
+#endif // 0
+
+ genConsumeRegAndCopy(blockSize, REG_ARG_2);
+ genConsumeRegAndCopy(srcAddr, REG_ARG_1);
+ genConsumeRegAndCopy(dstAddr, REG_ARG_0);
+
+ genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
+}
+
+
+// generate code do a switch statement based on a table of ip-relative offsets
+void
+CodeGen::genTableBasedSwitch(GenTree* treeNode)
+{
+ NYI("Emit table based switch");
+ genConsumeOperands(treeNode->AsOp());
+ regNumber idxReg = treeNode->gtOp.gtOp1->gtRegNum;
+ regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
+
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ // load the ip-relative offset (which is relative to start of fgFirstBB)
+ //getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0);
+
+ // add it to the absolute address of fgFirstBB
+ compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+ //getEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, compiler->fgFirstBB, tmpReg);
+ //getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg);
+ // jmp baseReg
+ // getEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg);
+}
+
+// emits the table and an instruction to get the address of the first element
+void
+CodeGen::genJumpTable(GenTree* treeNode)
+{
+ NYI("Emit Jump table");
+ noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
+ assert(treeNode->OperGet() == GT_JMPTABLE);
+
+ unsigned jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
+ BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
+ unsigned jmpTabOffs;
+ unsigned jmpTabBase;
+
+ jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
+
+ jmpTabOffs = 0;
+
+ JITDUMP("\n J_M%03u_DS%02u LABEL DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
+
+ for (unsigned i = 0; i<jumpCount; i++)
+ {
+ BasicBlock* target = *jumpTable++;
+ noway_assert(target->bbFlags & BBF_JMP_TARGET);
+
+ JITDUMP(" DD L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
+
+ getEmitter()->emitDataGenData(i, target);
+ };
+
+ getEmitter()->emitDataGenEnd();
+
+ // Access to inline data is 'abstracted' by a special type of static member
+ // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+ // to constant data, not a real static field.
+ getEmitter()->emitIns_R_C(INS_lea,
+ emitTypeSize(TYP_I_IMPL),
+ treeNode->gtRegNum,
+ compiler->eeFindJitDataOffs(jmpTabBase),
+ 0);
+ genProduceReg(treeNode);
+}
+
+
+// generate code for the locked operations:
+// GT_LOCKADD, GT_XCHG, GT_XADD
+void
+CodeGen::genLockedInstructions(GenTree* treeNode)
+{
+#if 0
+ GenTree* data = treeNode->gtOp.gtOp2;
+ GenTree* addr = treeNode->gtOp.gtOp1;
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber dataReg = data->gtRegNum;
+ regNumber addrReg = addr->gtRegNum;
+ instruction ins;
+
+ // all of these nodes implicitly do an indirection on op1
+ // so create a temporary node to feed into the pattern matching
+ GenTreeIndir i = indirForm(data->TypeGet(), addr);
+ genConsumeReg(addr);
+
+ // The register allocator should have extended the lifetime of the address
+ // so that it is not used as the target.
+ noway_assert(addrReg != targetReg);
+
+ // If data is a lclVar that's not a last use, we'd better have allocated a register
+ // for the result (except in the case of GT_LOCKADD which does not produce a register result).
+ assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) || (data->gtFlags & GTF_VAR_DEATH) != 0);
+
+ genConsumeIfReg(data);
+ if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
+ {
+ inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg);
+ data->gtRegNum = targetReg;
+
+ // TODO-ARM64-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
+ // original gtRegNum on data, after calling emitInsBinary below.
+ }
+ switch (treeNode->OperGet())
+ {
+ case GT_LOCKADD:
+ instGen(INS_lock);
+ ins = INS_add;
+ break;
+ case GT_XCHG:
+ // lock is implied by xchg
+ ins = INS_xchg;
+ break;
+ case GT_XADD:
+ instGen(INS_lock);
+ ins = INS_xadd;
+ break;
+ default:
+ unreached();
+ }
+ getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data);
+
+ if (treeNode->gtRegNum != REG_NA)
+ {
+ genProduceReg(treeNode);
+ }
+#else // !0
+ NYI("genLockedInstructions");
+#endif // !0
+}
+
+
+// generate code for BoundsCheck nodes
+void
+CodeGen::genRangeCheck(GenTreePtr oper)
+{
+#ifdef FEATURE_SIMD
+ noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
+#else // !FEATURE_SIMD
+ noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
+#endif // !FEATURE_SIMD
+
+ GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+
+ GenTreePtr arrLen = bndsChk->gtArrLen;
+ GenTreePtr arrIndex = bndsChk->gtIndex;
+ GenTreePtr arrRef = NULL;
+ int lenOffset = 0;
+
+ GenTree *src1, *src2;
+ emitJumpKind jmpKind;
+
+ genConsumeRegs(arrLen);
+ genConsumeRegs(arrIndex);
+
+ if (arrIndex->isContainedIntOrIImmed())
+ {
+ src1 = arrLen;
+ src2 = arrIndex;
+ jmpKind = EJ_jbe;
+ }
+ else
+ {
+ src1 = arrIndex;
+ src2 = arrLen;
+ jmpKind = EJ_jae;
+ }
+
+ GenTreeIntConCommon* intConst = nullptr;
+ if (src2->isContainedIntOrIImmed())
+ {
+ intConst = src2->AsIntConCommon();
+ }
+
+ if (intConst != nullptr)
+ {
+ getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, src1->gtRegNum, intConst->IconValue());
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, src1->gtRegNum, src2->gtRegNum);
+ }
+
+ genJumpToThrowHlpBlk(jmpKind, Compiler::ACK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+}
+
+//------------------------------------------------------------------------
+// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
+// lower bound for the given dimension.
+//
+// Arguments:
+// elemType - the element type of the array
+// rank - the rank of the array
+// dimension - the dimension for which the lower bound offset will be returned.
+//
+// Return Value:
+// The offset.
+// TODO-Cleanup: move to CodeGenCommon.cpp
+
+// static
+unsigned
+CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
+{
+ // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
+ return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
+}
+
+//------------------------------------------------------------------------
+// genOffsetOfMDArrayLength: Returns the offset from the Array object to the
+// size for the given dimension.
+//
+// Arguments:
+// elemType - the element type of the array
+// rank - the rank of the array
+// dimension - the dimension for which the lower bound offset will be returned.
+//
+// Return Value:
+// The offset.
+// TODO-Cleanup: move to CodeGenCommon.cpp
+
+// static
+unsigned
+CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
+{
+ // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
+ return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
+// producing the effective index by subtracting the lower bound.
+//
+// Arguments:
+// arrIndex - the node for which we're generating code
+//
+// Return Value:
+// None.
+//
+
+void
+CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
+{
+#if 0
+ GenTreePtr arrObj = arrIndex->ArrObj();
+ GenTreePtr indexNode = arrIndex->IndexExpr();
+
+ regNumber arrReg = genConsumeReg(arrObj);
+ regNumber indexReg = genConsumeReg(indexNode);
+ regNumber tgtReg = arrIndex->gtRegNum;
+
+ unsigned dim = arrIndex->gtCurrDim;
+ unsigned rank = arrIndex->gtArrRank;
+ var_types elemType = arrIndex->gtArrElemType;
+
+ noway_assert(tgtReg != REG_NA);
+
+ // Subtract the lower bound for this dimension.
+ // TODO-ARM64-CQ: make this contained if it's an immediate that fits.
+ if (tgtReg != indexReg)
+ {
+ inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet());
+ }
+ getEmitter()->emitIns_R_AR(INS_sub,
+ emitActualTypeSize(TYP_INT),
+ tgtReg,
+ arrReg,
+ genOffsetOfMDArrayLowerBound(elemType, rank, dim));
+ getEmitter()->emitIns_R_AR(INS_cmp,
+ emitActualTypeSize(TYP_INT),
+ tgtReg,
+ arrReg,
+ genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
+ genJumpToThrowHlpBlk(EJ_jae, Compiler::ACK_RNGCHK_FAIL);
+
+ genProduceReg(arrIndex);
+#else // !0
+ NYI("genCodeForArrIndex");
+#endif // !0
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrOffset: Generates code to compute the flattened array offset for
+// one dimension of an array reference:
+// result = (prevDimOffset * dimSize) + effectiveIndex
+// where dimSize is obtained from the arrObj operand
+//
+// Arguments:
+// arrOffset - the node for which we're generating code
+//
+// Return Value:
+// None.
+//
+// Notes:
+// dimSize and effectiveIndex are always non-negative, the former by design,
+// and the latter because it has been normalized to be zero-based.
+
+void
+CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
+{
+#if 0
+ GenTreePtr offsetNode = arrOffset->gtOffset;
+ GenTreePtr indexNode = arrOffset->gtIndex;
+ GenTreePtr arrObj = arrOffset->gtArrObj;
+
+ regNumber tgtReg = arrOffset->gtRegNum;
+
+ noway_assert(tgtReg != REG_NA);
+
+ unsigned dim = arrOffset->gtCurrDim;
+ unsigned rank = arrOffset->gtArrRank;
+ var_types elemType = arrOffset->gtArrElemType;
+
+ // We will use a temp register for the offset*scale+effectiveIndex computation.
+ regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask);
+
+ if (!offsetNode->IsZero())
+ {
+ // Evaluate tgtReg = offsetReg*dim_size + indexReg.
+ // tmpReg is used to load dim_size and the result of the multiplication.
+ // Note that dim_size will never be negative.
+ regNumber offsetReg = genConsumeReg(offsetNode);
+ regNumber indexReg = genConsumeReg(indexNode);
+ regNumber arrReg = genConsumeReg(arrObj);
+
+ getEmitter()->emitIns_R_AR(INS_mov,
+ emitActualTypeSize(TYP_INT),
+ tmpReg,
+ arrReg,
+ genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
+ inst_RV_RV(INS_imul, tmpReg, offsetReg);
+
+ if (tmpReg == tgtReg)
+ {
+ inst_RV_RV(INS_add, tmpReg, indexReg);
+ }
+ else
+ {
+ if (indexReg != tgtReg)
+ {
+ inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL);
+ }
+ inst_RV_RV(INS_add, tgtReg, tmpReg);
+ }
+ }
+ else
+ {
+ regNumber indexReg = genConsumeReg(indexNode);
+ if (indexReg != tgtReg)
+ {
+ inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
+ }
+ }
+ genProduceReg(arrOffset);
+#else // !0
+ NYI("genCodeForArrOffset");
+#endif // !0
+}
+
+// make a temporary indir we can feed to pattern matching routines
+// in cases where we don't want to instantiate all the indirs that happen
+//
+// TODO-Cleanup: move to CodeGenCommon.cpp
+GenTreeIndir CodeGen::indirForm(var_types type, GenTree *base)
+{
+ GenTreeIndir i(GT_IND, type, base, nullptr);
+ i.gtRegNum = REG_NA;
+ // has to be nonnull (because contained nodes can't be the last in block)
+ // but don't want it to be a valid pointer
+ i.gtNext = (GenTree *)(-1);
+ return i;
+}
+
+// make a temporary int we can feed to pattern matching routines
+// in cases where we don't want to instantiate
+//
+// TODO-Cleanup: move to CodeGenCommon.cpp
+GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
+{
+ GenTreeIntCon i(type, value);
+ i.gtRegNum = REG_NA;
+ // has to be nonnull (because contained nodes can't be the last in block)
+ // but don't want it to be a valid pointer
+ i.gtNext = (GenTree *)(-1);
+ return i;
+}
+
+
+instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
+{
+ instruction ins = INS_brk;
+
+ if (varTypeIsFloating(type))
+ {
+ switch (oper)
+ {
+ case GT_ADD:
+ ins = INS_fadd;
+ break;
+ case GT_SUB:
+ ins = INS_fsub;
+ break;
+ case GT_MUL:
+ ins = INS_fmul;
+ break;
+ case GT_DIV:
+ ins = INS_fdiv;
+ break;
+ case GT_NEG:
+ ins = INS_fneg;
+ break;
+
+ default:
+ NYI("Unhandled oper in genGetInsForOper() - float");
+ unreached();
+ break;
+ }
+ }
+ else
+ {
+ switch (oper)
+ {
+ case GT_ADD:
+ ins = INS_add;
+ break;
+ case GT_AND:
+ ins = INS_and;
+ break;
+ case GT_DIV:
+ ins = INS_sdiv;
+ break;
+ case GT_UDIV:
+ ins = INS_udiv;
+ break;
+ case GT_MUL:
+ ins = INS_mul;
+ break;
+ case GT_LSH:
+ ins = INS_lsl;
+ break;
+ case GT_NOT:
+ ins = INS_mvn;
+ break;
+ case GT_OR:
+ ins = INS_orr;
+ break;
+ case GT_RSH:
+ ins = INS_asr;
+ break;
+ case GT_RSZ:
+ ins = INS_lsr;
+ break;
+ case GT_SUB:
+ ins = INS_sub;
+ break;
+ case GT_XOR:
+ ins = INS_eor;
+ break;
+
+ default:
+ NYI("Unhandled oper in genGetInsForOper() - integer");
+ unreached();
+ break;
+ }
+ }
+ return ins;
+}
+
+/** Generates the code sequence for a GenTree node that
+ * represents a bit shift operation (<<, >>, >>>).
+ *
+ * Arguments: operand: the value to be shifted by shiftBy bits.
+ * shiftBy: the number of bits to shift the operand.
+ * parent: the actual bitshift node (that specifies the
+ * type of bitshift to perform.
+ *
+ * Preconditions: a) All GenTrees are register allocated.
+ * b) Either shiftBy is a contained constant or
+ * it's an expression sitting in RCX.
+ * c) The actual bit shift node is not stack allocated
+ * nor contained (not yet supported).
+ */
+void CodeGen::genCodeForShift(GenTreePtr operand,
+ GenTreePtr shiftBy,
+ GenTreePtr parent)
+{
+ var_types targetType = parent->TypeGet();
+ genTreeOps oper = parent->OperGet();
+ instruction ins = genGetInsForOper(oper, targetType);
+ emitAttr size = emitTypeSize(parent);
+
+ assert(parent->gtRegNum != REG_NA);
+ genConsumeReg(operand);
+
+ if (!shiftBy->IsCnsIntOrI())
+ {
+ genConsumeReg(shiftBy);
+ getEmitter()->emitIns_R_R_R(ins, size, parent->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(ins, size, parent->gtRegNum, operand->gtRegNum, shiftBy->gtIntCon.gtIconVal);
+ }
+
+ genProduceReg(parent);
+}
+
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genUnspillRegIfNeeded(GenTree *tree)
+{
+ regNumber dstReg = tree->gtRegNum;
+
+ GenTree* unspillTree = tree;
+ if (tree->gtOper == GT_RELOAD)
+ {
+ unspillTree = tree->gtOp.gtOp1;
+ }
+ if (unspillTree->gtFlags & GTF_SPILLED)
+ {
+ if (genIsRegCandidateLocal(unspillTree))
+ {
+ // Reset spilled flag, since we are going to load a local variable from its home location.
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+
+ GenTreeLclVarCommon* lcl = unspillTree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+ // Load local variable from its home location.
+ inst_RV_TT(ins_Load(unspillTree->gtType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
+
+ unspillTree->SetInReg();
+
+ // TODO-Review: We would like to call:
+ // genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
+ // instead of the following code, but this ends up hitting this assert:
+ // assert((regSet.rsMaskVars & regMask) == 0);
+ // due to issues with LSRA resolution moves.
+ // So, just force it for now. This probably indicates a condition that creates a GC hole!
+ //
+ // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
+ // because the variable is not really going live or dead, but that method is somewhat poorly
+ // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
+ // This code exists in other CodeGen*.cpp files.
+
+ // Don't update the variable's location if we are just re-spilling it again.
+
+ if ((unspillTree->gtFlags & GTF_SPILL) == 0)
+ {
+ genUpdateVarReg(varDsc, tree);
+#ifdef DEBUG
+ if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+ {
+ JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
+ }
+#endif // DEBUG
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+
+#ifdef DEBUG
+ if (compiler->verbose)
+ {
+ printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
+ varDsc->PrintVarReg();
+ printf(" is becoming live ");
+ compiler->printTreeID(unspillTree);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ regSet.rsMaskVars |= genGetRegMask(varDsc);
+ }
+ }
+ else
+ {
+ TempDsc* t = regSet.rsUnspillInPlace(unspillTree);
+ getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType),
+ emitActualTypeSize(unspillTree->gtType),
+ dstReg,
+ t->tdTempNum(),
+ 0);
+ compiler->tmpRlsTemp(t);
+
+ unspillTree->gtFlags &= ~GTF_SPILLED;
+ unspillTree->SetInReg();
+ }
+
+ gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+ }
+}
+
+// Do Liveness update for a subnodes that is being consumed by codegen
+// including the logic for reload in case is needed and also takes care
+// of locating the value on the desired register.
+void CodeGen::genConsumeRegAndCopy(GenTree *tree, regNumber needReg)
+{
+ regNumber treeReg = genConsumeReg(tree);
+ if (treeReg != needReg)
+ {
+ var_types targetType = tree->TypeGet();
+ inst_RV_RV(ins_Copy(targetType), needReg, treeReg, targetType);
+ }
+}
+
+void CodeGen::genRegCopy(GenTree* treeNode)
+{
+ assert(treeNode->OperGet() == GT_COPY);
+
+ var_types targetType = treeNode->TypeGet();
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(targetReg != REG_NA);
+
+ GenTree* op1 = treeNode->gtOp.gtOp1;
+
+ // Check whether this node and the node from which we're copying the value have the same
+ // register type.
+ // This can happen if (currently iff) we have a SIMD vector type that fits in an integer
+ // register, in which case it is passed as an argument, or returned from a call,
+ // in an integer register and must be copied if it's in an xmm register.
+
+ if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1))
+ {
+#if 0
+ instruction ins;
+ regNumber fpReg;
+ regNumber intReg;
+ if(varTypeIsFloating(treeNode))
+ {
+ ins = INS_mov_i2xmm;
+ fpReg = targetReg;
+ intReg = op1->gtRegNum;
+ }
+ else
+ {
+ ins = INS_mov_xmm2i;
+ intReg = targetReg;
+ fpReg = op1->gtRegNum;
+ }
+ inst_RV_RV(ins, fpReg, intReg, targetType);
+#else
+ NYI_ARM64("CodeGen - FP/Int RegCopy");
+#endif
+ }
+ else
+ {
+ inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
+ }
+
+ if (op1->IsLocal())
+ {
+ // The lclVar will never be a def.
+ // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
+ // appropriately set the gcInfo for the copied value.
+ // If not, there are two cases we need to handle:
+ // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
+ // will remain live in its original register.
+ // genProduceReg() will appropriately set the gcInfo for the copied value,
+ // and genConsumeReg will reset it.
+ // - Otherwise, we need to update register info for the lclVar.
+
+ GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
+ assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
+
+ if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
+ {
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+ // If we didn't just spill it (in genConsumeReg, above), then update the register info
+ if (varDsc->lvRegNum != REG_STK)
+ {
+ // The old location is dying
+ genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
+
+ gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
+
+ genUpdateVarReg(varDsc, treeNode);
+
+ // The new location is going live
+ genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
+ }
+ }
+ }
+ genProduceReg(treeNode);
+}
+
+// Do liveness update for a subnode that is being consumed by codegen.
+// TODO-Cleanup: move to CodeGenCommon.cpp
+regNumber CodeGen::genConsumeReg(GenTree *tree)
+{
+ if (tree->OperGet() == GT_COPY)
+ {
+ genRegCopy(tree);
+ }
+ // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
+ // interferes with one of the other sources (or the target, if it's a "delayed use" register)).
+ // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
+ // always using GT_COPY to make the lclVar location explicit.
+ // Note that we have to do this before calling genUpdateLife because otherwise if we spill it
+ // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds
+ // the lclVar (normally when a lclVar is spilled it is then used from its former register
+ // location, which matches the gtRegNum on the node).
+ // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded
+ // because if it's on the stack it will always get reloaded into tree->gtRegNum).
+ if (genIsRegCandidateLocal(tree))
+ {
+ GenTreeLclVarCommon *lcl = tree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+ if ((varDsc->lvRegNum != REG_STK) && (varDsc->lvRegNum != tree->gtRegNum))
+ {
+ inst_RV_RV(ins_Copy(tree->TypeGet()), tree->gtRegNum, varDsc->lvRegNum);
+ }
+ }
+
+ genUnspillRegIfNeeded(tree);
+
+ // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
+ genUpdateLife(tree);
+ assert(tree->gtRegNum != REG_NA);
+
+ // there are three cases where consuming a reg means clearing the bit in the live mask
+ // 1. it was not produced by a local
+ // 2. it was produced by a local that is going dead
+ // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
+
+ if (genIsRegCandidateLocal(tree))
+ {
+ GenTreeLclVarCommon *lcl = tree->AsLclVarCommon();
+ LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+ assert(varDsc->lvLRACandidate);
+
+ if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
+ {
+ gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum));
+ }
+ else if (varDsc->lvRegNum == REG_STK)
+ {
+ // We have loaded this into a register only temporarily
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ }
+ }
+ else
+ {
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ }
+
+ return tree->gtRegNum;
+}
+
+// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genConsumeAddress(GenTree* addr)
+{
+ if (addr->OperGet() == GT_LEA)
+ {
+ genConsumeAddrMode(addr->AsAddrMode());
+ }
+ else if (!addr->isContained())
+ {
+ genConsumeReg(addr);
+ }
+}
+
+// do liveness update for a subnode that is being consumed by codegen
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genConsumeAddrMode(GenTreeAddrMode *addr)
+{
+ if (addr->Base())
+ genConsumeReg(addr->Base());
+ if (addr->Index())
+ genConsumeReg(addr->Index());
+}
+
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genConsumeRegs(GenTree* tree)
+{
+ if (tree->isContained())
+ {
+ if (tree->isIndir())
+ {
+ genConsumeAddress(tree->AsIndir()->Addr());
+ }
+ else if (tree->OperGet() == GT_AND)
+ {
+ // This is the special contained GT_AND that we created in Lowering::LowerCmp()
+ // Now we need to consume the operands of the GT_AND node.
+ genConsumeOperands(tree->AsOp());
+ }
+ else
+ {
+ assert(tree->OperIsLeaf());
+ }
+ }
+ else
+ {
+ genConsumeReg(tree);
+ }
+}
+
+//------------------------------------------------------------------------
+// genConsumeOperands: Do liveness update for the operands of a unary or binary tree
+//
+// Arguments:
+// tree - the GenTreeOp whose operands will have their liveness updated.
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Note that this logic is localized here because we must do the liveness update in
+// the correct execution order. This is important because we may have two operands
+// that involve the same lclVar, and if one is marked "lastUse" we must handle it
+// after the first.
+// TODO-Cleanup: move to CodeGenCommon.cpp
+
+void CodeGen::genConsumeOperands(GenTreeOp* tree)
+{
+ GenTree* firstOp = tree->gtOp1;
+ GenTree* secondOp = tree->gtOp2;
+ if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
+ {
+ assert(secondOp != nullptr);
+ firstOp = secondOp;
+ secondOp = tree->gtOp1;
+ }
+ if (firstOp != nullptr)
+ {
+ genConsumeRegs(firstOp);
+ }
+ if (secondOp != nullptr)
+ {
+ genConsumeRegs(secondOp);
+ }
+}
+
+// do liveness update for register produced by the current node in codegen
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genProduceReg(GenTree *tree)
+{
+ if (tree->gtFlags & GTF_SPILL)
+ {
+ if (genIsRegCandidateLocal(tree))
+ {
+ // Store local variable to its home location.
+ tree->gtFlags &= ~GTF_REG_VAL;
+ inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(tree->gtLclVarCommon.gtLclNum)), tree, tree->gtRegNum);
+ }
+ else
+ {
+ tree->SetInReg();
+ regSet.rsSpillTree(tree->gtRegNum, tree);
+ tree->gtFlags |= GTF_SPILLED;
+ tree->gtFlags &= ~GTF_SPILL;
+ gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+ return;
+ }
+ }
+
+ genUpdateLife(tree);
+
+ // If we've produced a register, mark it as a pointer, as needed.
+ if (tree->gtHasReg())
+ {
+ // We only mark the register in the following cases:
+ // 1. It is not a register candidate local. In this case, we're producing a
+ // register from a local, but the local is not a register candidate. Thus,
+ // we must be loading it as a temp register, and any "last use" flag on
+ // the register wouldn't be relevant.
+ // 2. The register candidate local is going dead. There's no point to mark
+ // the register as live, with a GC pointer, if the variable is dead.
+ if (!genIsRegCandidateLocal(tree) ||
+ ((tree->gtFlags & GTF_VAR_DEATH) == 0))
+ {
+ gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
+ }
+ }
+ tree->SetInReg();
+}
+
+// transfer gc/byref status of src reg to dst reg
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
+{
+ regMaskTP srcMask = genRegMask(src);
+ regMaskTP dstMask = genRegMask(dst);
+
+ if (gcInfo.gcRegGCrefSetCur & srcMask)
+ {
+ gcInfo.gcMarkRegSetGCref(dstMask);
+ }
+ else if (gcInfo.gcRegByrefSetCur & srcMask)
+ {
+ gcInfo.gcMarkRegSetByref(dstMask);
+ }
+ else
+ {
+ gcInfo.gcMarkRegSetNpt(dstMask);
+ }
+}
+
+
+// generates an ip-relative call or indirect call via reg ('call reg')
+// pass in 'addr' for a relative call or 'base' for a indirect register call
+// methHnd - optional, only used for pretty printing
+// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genEmitCall(int callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo)
+ void* addr,
+ emitAttr retSize,
+ IL_OFFSETX ilOffset,
+ regNumber base,
+ bool isJump,
+ bool isNoGC)
+{
+
+ getEmitter()->emitIns_Call(emitter::EmitCallType(callType),
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ addr,
+ 0,
+ retSize,
+ gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
+ ilOffset,
+ base, REG_NA, 0, 0,
+ isJump,
+ emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
+}
+
+// generates an indirect call via addressing mode (call []) given an indir node
+// methHnd - optional, only used for pretty printing
+// retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genEmitCall(int callType,
+ CORINFO_METHOD_HANDLE methHnd,
+ INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo)
+ GenTreeIndir* indir,
+ emitAttr retSize,
+ IL_OFFSETX ilOffset)
+{
+ genConsumeAddress(indir->Addr());
+
+ getEmitter()->emitIns_Call(emitter::EmitCallType(callType),
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ nullptr,
+ 0,
+ retSize,
+ gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
+ ilOffset,
+ indir->Base() ? indir->Base()->gtRegNum : REG_NA,
+ indir->Index() ? indir->Index()->gtRegNum : REG_NA,
+ indir->Scale(),
+ indir->Offset());
+}
+
+// Produce code for a GT_CALL node
+void CodeGen::genCallInstruction(GenTreePtr node)
+{
+ GenTreeCall *call = node->AsCall();
+
+ assert(call->gtOper == GT_CALL);
+
+ gtCallTypes callType = (gtCallTypes)call->gtCallType;
+
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET;
+
+ // all virtuals should have been expanded into a control expression
+ assert (!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
+
+ // Consume all the arg regs
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->IsList());
+
+ GenTreePtr argNode = list->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
+ assert(curArgTabEntry);
+
+ if (curArgTabEntry->regNum == REG_STK)
+ continue;
+
+ regNumber argReg = curArgTabEntry->regNum;
+ genConsumeReg(argNode);
+ if (argNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ }
+
+ // In the case of a varargs call,
+ // the ABI dictates that if we have floating point args,
+ // we must pass the enregistered arguments in both the
+ // integer and floating point registers so, let's do that.
+ if (call->IsVarargs() && varTypeIsFloating(argNode))
+ {
+ NYI_ARM64("CodeGen - IsVarargs");
+ }
+ }
+
+ // Insert a null check on "this" pointer if asked.
+ if (call->NeedsNullCheck())
+ {
+ const regNumber regThis = genGetThisArgReg(call);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, regThis, 0);
+ }
+
+ // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
+ CORINFO_METHOD_HANDLE methHnd;
+ GenTree* target = call->gtControlExpr;
+ if (callType == CT_INDIRECT)
+ {
+ assert(target == nullptr);
+ target = call->gtCall.gtCallAddr;
+ methHnd = nullptr;
+ }
+ else
+ {
+ methHnd = call->gtCallMethHnd;
+ }
+
+ CORINFO_SIG_INFO* sigInfo = nullptr;
+#ifdef DEBUG
+ // Pass the call signature information down into the emitter so the emitter can associate
+ // native call sites with the signatures they were generated from.
+ if (callType != CT_HELPER)
+ {
+ sigInfo = call->callSig;
+ }
+#endif // DEBUG
+
+ // If fast tail call, then we are done. In this case we setup the args (both reg args
+ // and stack args in incoming arg area) and call target in rax. Epilog sequence would
+ // generate "br x0".
+ if (call->IsFastTailCall())
+ {
+ NYI_ARM64("CodeGen - IsFastTailCall");
+
+ // Don't support fast tail calling JIT helpers
+ assert(callType != CT_HELPER);
+
+ // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
+ assert(target != nullptr);
+
+ genConsumeReg(target);
+#if 0
+ if (target->gtRegNum != REG_RAX)
+ {
+ inst_RV_RV(INS_mov, REG_RAX, target->gtRegNum);
+ }
+#endif
+ return;
+ }
+
+ // For a pinvoke to unmanged code we emit a label to clear
+ // the GC pointer state before the callsite.
+ // We can't utilize the typical lazy killing of GC pointers
+ // at (or inside) the callsite.
+ if (call->IsUnmanaged())
+ {
+ genDefineTempLabel(genCreateTempLabel());
+ }
+
+ // Determine return value size.
+ emitAttr retSize = EA_PTRSIZE;
+ if (call->gtType == TYP_REF ||
+ call->gtType == TYP_ARRAY)
+ {
+ retSize = EA_GCREF;
+ }
+ else if (call->gtType == TYP_BYREF)
+ {
+ retSize = EA_BYREF;
+ }
+
+#ifdef DEBUGGING_SUPPORT
+ // We need to propagate the IL offset information to the call instruction, so we can emit
+ // an IL to native mapping record for the call, to support managed return value debugging.
+ // We don't want tail call helper calls that were converted from normal calls to get a record,
+ // so we skip this hash table lookup logic in that case.
+ if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
+ {
+ (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
+ }
+#endif // DEBUGGING_SUPPORT
+
+ if (target != nullptr)
+ {
+ // For Arm64 a call target can not be a contained indirection
+ assert(!target->isContainedIndir());
+
+ // We have already generated code for gtControlExpr evaluating it into a register.
+ // We just need to emit "call reg" in this case.
+ //
+ assert(genIsValidIntReg(target->gtRegNum));
+
+ genEmitCall(emitter::EC_INDIR_R,
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ nullptr, //addr
+ retSize,
+ ilOffset,
+ genConsumeReg(target));
+ }
+ else
+ {
+ // Generate a direct call to a non-virtual user defined or helper method
+ assert(callType == CT_HELPER || callType == CT_USER_FUNC);
+
+ void *addr = nullptr;
+ if (callType == CT_HELPER)
+ {
+ // Direct call to a helper method.
+ CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
+ noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+ void *pAddr = nullptr;
+ addr = compiler->compGetHelperFtn(helperNum, (void **)&pAddr);
+
+ if (addr == nullptr)
+ {
+ addr = pAddr;
+ }
+ }
+ else
+ {
+ // Direct call to a non-virtual user function.
+ CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
+ if (call->IsSameThis())
+ {
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
+ }
+
+ if ((call->NeedsNullCheck()) == 0)
+ {
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
+ }
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
+
+ addr = addrInfo.addr;
+ }
+#if 0
+ // Use this path if you want to load an absolute call target using
+ // a sequence of movs followed by an indirect call (blr instruction)
+
+ // Load the call target address in x16
+ instGen_Set_Reg_To_Imm(EA_8BYTE, REG_IP0, (ssize_t) addr);
+
+ // indirect call to constant address in IP0
+ genEmitCall(emitter::EC_INDIR_R,
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ nullptr, //addr
+ retSize,
+ ilOffset,
+ REG_IP0);
+#else
+ // Non-virtual direct call to known addresses
+ genEmitCall(emitter::EC_FUNC_TOKEN,
+ methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo)
+ addr,
+ retSize,
+ ilOffset);
+#endif
+ }
+
+ // if it was a pinvoke we may have needed to get the address of a label
+ if (genPendingCallLabel)
+ {
+ assert(call->IsUnmanaged());
+ genDefineTempLabel(genPendingCallLabel);
+ genPendingCallLabel = nullptr;
+ }
+
+ // Update GC info:
+ // All Callee arg registers are trashed and no longer contain any GC pointers.
+ // TODO-ARM64-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
+ // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
+ // registers from RBM_CALLEE_TRASH
+ assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+ assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+ gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
+ gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
+
+ var_types returnType = call->TypeGet();
+ if (returnType != TYP_VOID)
+ {
+ regNumber returnReg = (varTypeIsFloating(returnType) ? REG_FLOATRET : REG_INTRET);
+ if (call->gtRegNum != returnReg)
+ {
+ inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
+ }
+ genProduceReg(call);
+ }
+
+ // If there is nothing next, that means the result is thrown away, so this value is not live.
+ // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
+ if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
+ {
+ gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+ }
+}
+
+// Produce code for a GT_JMP node.
+// The arguments of the caller needs to be transferred to the callee before exiting caller.
+// The actual jump to callee is generated as part of caller epilog sequence.
+// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
+void CodeGen::genJmpMethod(GenTreePtr jmp)
+{
+ assert(jmp->OperGet() == GT_JMP);
+ assert(compiler->compJmpOpUsed);
+
+ // If no arguments, nothing to do
+ if (compiler->info.compArgsCount == 0)
+ {
+ return;
+ }
+
+#if 0
+ // Make sure register arguments are in their initial registers
+ // and stack arguments are put back as well.
+ unsigned varNum;
+ LclVarDsc* varDsc;
+
+ // First move any en-registered stack arguments back to the stack.
+ // At the same time any reg arg not in correct reg is moved back to its stack location.
+ //
+ // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
+ // But that would require us to deal with circularity while moving values around. Spilling
+ // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
+ // are not frequent.
+ for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+ {
+ varDsc = compiler->lvaTable + varNum;
+
+ if (varDsc->lvPromoted)
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ unsigned fieldVarNum = varDsc->lvFieldLclStart;
+ varDsc = compiler->lvaTable + fieldVarNum;
+ }
+ noway_assert(varDsc->lvIsParam);
+
+ if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
+ {
+ // Skip reg args which are already in its right register for jmp call.
+ // If not, we will spill such args to their stack locations.
+ //
+ // If we need to generate a tail call profiler hook, then spill all
+ // arg regs to free them up for the callback.
+ if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
+ continue;
+ }
+ else if (varDsc->lvRegNum == REG_STK)
+ {
+ // Skip args which are currently living in stack.
+ continue;
+ }
+
+ // If we came here it means either a reg argument not in the right register or
+ // a stack argument currently living in a register. In either case the following
+ // assert should hold.
+ assert(varDsc->lvRegNum != REG_STK);
+
+ var_types loadType = varDsc->lvaArgType();
+ getEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), varDsc->lvRegNum, varNum, 0);
+
+ // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of varDsc->lvRegNum.
+ regMaskTP tempMask = genRegMask(varDsc->lvRegNum);
+ regSet.rsMaskVars &= ~tempMask;
+ gcInfo.gcMarkRegSetNpt(tempMask);
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
+ }
+ }
+
+#ifdef PROFILING_SUPPORTED
+ // At this point all arg regs are free.
+ // Emit tail call profiler callback.
+ genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif
+
+ // Next move any un-enregistered register arguments back to their register.
+ regMaskTP fixedIntArgMask = RBM_NONE; // tracks the int arg regs occupying fixed args in case of a vararg method.
+ unsigned firstArgVarNum = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
+ for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+ {
+ varDsc = compiler->lvaTable + varNum;
+ if (varDsc->lvPromoted)
+ {
+ noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+ unsigned fieldVarNum = varDsc->lvFieldLclStart;
+ varDsc = compiler->lvaTable + fieldVarNum;
+ }
+ noway_assert(varDsc->lvIsParam);
+
+ // Skip if arg not passed in a register.
+ if (!varDsc->lvIsRegArg)
+ continue;
+
+ // Register argument
+ noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+
+ // Is register argument already in the right register?
+ // If not load it from its stack location.
+ var_types loadType = varDsc->lvaArgType();
+ regNumber argReg = varDsc->lvArgReg; // incoming arg register
+
+ if (varDsc->lvRegNum != argReg)
+ {
+ assert(genIsValidReg(argReg));
+
+ getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+
+ // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+ // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+ // Therefore manually update life of argReg. Note that GT_JMP marks the end of the basic block
+ // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+ regSet.rsMaskVars |= genRegMask(argReg);
+ gcInfo.gcMarkRegPtrVal(argReg, loadType);
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
+ }
+ }
+
+ // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg register.
+ if (compiler->info.compIsVarArgs)
+ {
+ regNumber intArgReg;
+ if (varTypeIsFloating(loadType))
+ {
+ intArgReg = compiler->getCallArgIntRegister(argReg);
+ inst_RV_RV(INS_mov_xmm2i, argReg, intArgReg, loadType);
+ }
+ else
+ {
+ intArgReg = argReg;
+ }
+
+ fixedIntArgMask |= genRegMask(intArgReg);
+
+ if (intArgReg == REG_ARG_0)
+ {
+ assert(firstArgVarNum == BAD_VAR_NUM);
+ firstArgVarNum = varNum;
+ }
+ }
+ }
+
+ // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments,
+ // load the remaining arg registers (both int and float) from the corresponding
+ // shadow stack slots. This is for the reason that we don't know the number and type
+ // of non-fixed params passed by the caller, therefore we have to assume the worst case
+ // of caller passing float/double args both in int and float arg regs.
+ //
+ // The caller could have passed gc-ref/byref type var args. Since these are var args
+ // the callee no way of knowing their gc-ness. Therefore, mark the region that loads
+ // remaining arg registers from shadow stack slots as non-gc interruptible.
+ if (fixedIntArgMask != RBM_NONE)
+ {
+ assert(compiler->info.compIsVarArgs);
+ assert(firstArgVarNum != BAD_VAR_NUM);
+
+ regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
+ if (remainingIntArgMask != RBM_NONE)
+ {
+ getEmitter()->emitDisableGC();
+ for (int argNum = 0, argOffset=0; argNum < MAX_REG_ARG; ++argNum)
+ {
+ regNumber argReg = intArgRegs[argNum];
+ regMaskTP argRegMask = genRegMask(argReg);
+
+ if ((remainingIntArgMask & argRegMask) != 0)
+ {
+ remainingIntArgMask &= ~argRegMask;
+ getEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset);
+
+ // also load it in corresponding float arg reg
+ regNumber floatReg = compiler->getCallArgFloatRegister(argReg);
+ inst_RV_RV(INS_mov_i2xmm, floatReg, argReg);
+ }
+
+ argOffset += REGSIZE_BYTES;
+ }
+ getEmitter()->emitEnableGC();
+ }
+ }
+#else // !0
+ NYI("genJmpMethod");
+#endif // !0
+}
+
+// produce code for a GT_LEA subnode
+void CodeGen::genLeaInstruction(GenTreeAddrMode *lea)
+{
+ genConsumeOperands(lea);
+ emitter *emit = getEmitter();
+ emitAttr size = emitTypeSize(lea);
+
+ // In ARM64 we can only load addresses of the form:
+ //
+ // [Base + index*scale]
+ // [Base + Offset]
+ // [Literal] (PC-Relative)
+ //
+ // So for the case of a LEA node of the form [Base + Index*Scale + Offset] we will generate:
+ // destReg = baseReg + indexReg * scale;
+ // destReg = destReg + offset;
+ //
+ // TODO-ARM64-CQ: The purpose of the GT_LEA node is to directly reflect a single target architecture
+ // addressing mode instruction. Currently we're 'cheating' by producing one or more
+ // instructions to generate the addressing mode so we need to modify lowering to
+ // produce LEAs that are a 1:1 relationship to the ARM64 architecture.
+ if (lea->Base() && lea->Index())
+ {
+ DWORD lsl;
+
+ assert(isPow2(lea->gtScale));
+ BitScanForward(&lsl, lea->gtScale);
+
+ assert(lsl <= 4);
+
+ // First, generate code to load rd = [base + index*scale]
+ if (lsl > 0)
+ {
+ emit->emitIns_R_R_R_I(INS_add, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->Index()->gtRegNum, lsl, INS_OPTS_LSL);
+ }
+ else
+ {
+ emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->Index()->gtRegNum);
+ }
+ // If the offset is not zero, then compute rd = [rd + offset]
+ if (lea->gtOffset != 0)
+ {
+ emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, lea->gtRegNum, (int) lea->gtOffset);
+ }
+ }
+ else if (lea->Base())
+ {
+ if (lea->gtOffset != 0)
+ {
+ emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, lea->Base()->gtRegNum, (int) lea->gtOffset);
+ }
+ else
+ {
+ emit->emitIns_R_R(INS_mov, size, lea->gtRegNum, lea->Base()->gtRegNum);
+ }
+ }
+ else if (lea->Index())
+ {
+ // If we encounter a GT_LEA node without a base it means it came out
+ // when attempting to optimize an arbitrary arithmetic expression during lower.
+ // This is currently disabled in ARM64 since we need to adjust lower to account
+ // for the simpler instructions ARM64 supports.
+ // TODO-ARM64-CQ: Fix this and let LEA optimize arithmetic trees too.
+ assert(!"We shouldn't see a baseless address computation during CodeGen for ARM64");
+ }
+
+ genProduceReg(lea);
+}
+
+// Generate code to materialize a condition into a register
+// (the condition codes must already have been appropriately set)
+
+void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
+{
+ // Get the "jmpKind" using the gtOper kind
+ // Note that whether it is an unsigned cmp is governed by the GTF_UNSIGNED flags
+
+ emitJumpKind jmpKind = genJumpKindForOper(tree->gtOper, (tree->gtFlags & GTF_UNSIGNED) != 0);
+
+ inst_SET(jmpKind, dstReg);
+}
+
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for an integer cast
+// This method handles integer overflow checking casts
+// as well as ordinary integer casts.
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// The treeNode is not a contained node and must have an assigned register.
+// For a signed convert from byte, the source must be in a byte-addressable register.
+// Neither the source nor target type can be a floating point type.
+//
+// TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register.
+//
+void CodeGen::genIntToIntCast(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_CAST);
+
+ GenTreePtr castOp = treeNode->gtCast.CastOp();
+ emitter * emit = getEmitter();
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = genActualType(castOp->TypeGet());
+ emitAttr movSize = emitActualTypeSize(dstType);
+ bool movRequired = false;
+
+ bool isUnsignedDst = varTypeIsUnsigned(dstType);
+ bool isUnsignedSrc = varTypeIsUnsigned(srcType);
+
+ bool requiresOverflowCheck = false;
+
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber sourceReg = castOp->gtRegNum;
+
+ assert(genIsValidIntReg(targetReg));
+ assert(genIsValidIntReg(sourceReg));
+
+ instruction ins = INS_invalid;
+
+ // If necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set.
+ if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
+ {
+ srcType = genUnsignedType(srcType);
+ isUnsignedSrc = true;
+ }
+
+ if (treeNode->gtOverflow() && (genTypeSize(srcType) >= genTypeSize(dstType) || (srcType == TYP_INT && dstType == TYP_ULONG)))
+ {
+ requiresOverflowCheck = true;
+ }
+
+ genConsumeReg(castOp);
+
+ if (requiresOverflowCheck)
+ {
+ emitAttr cmpSize = EA_ATTR(genTypeSize(srcType));
+ ssize_t typeMin = 0;
+ ssize_t typeMax = 0;
+ ssize_t typeMask = 0;
+ bool signCheckOnly = false;
+
+ /* Do we need to compare the value, or just check masks */
+
+ switch (dstType)
+ {
+ case TYP_BYTE:
+ typeMask = ssize_t((int)0xFFFFFF80);
+ typeMin = SCHAR_MIN;
+ typeMax = SCHAR_MAX;
+ break;
+
+ case TYP_UBYTE:
+ typeMask = ssize_t((int)0xFFFFFF00L);
+ break;
+
+ case TYP_SHORT:
+ typeMask = ssize_t((int)0xFFFF8000);
+ typeMin = SHRT_MIN;
+ break;
+
+ case TYP_CHAR:
+ typeMask = ssize_t((int)0xFFFF0000L);
+ break;
+
+ case TYP_INT:
+ if (srcType == TYP_UINT)
+ {
+ signCheckOnly = true;
+ }
+ else
+ {
+ typeMask = 0xFFFFFFFF80000000LL;
+ typeMin = INT_MIN;
+ typeMax = INT_MAX;
+ }
+ break;
+
+ case TYP_UINT:
+ if (srcType == TYP_INT)
+ {
+ signCheckOnly = true;
+ }
+ else
+ {
+ typeMask = 0xFFFFFFFF00000000LL;
+ }
+ break;
+
+ case TYP_LONG:
+ noway_assert(srcType == TYP_ULONG);
+ signCheckOnly = true;
+ break;
+
+ case TYP_ULONG:
+ noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT));
+ signCheckOnly = true;
+ break;
+
+ default:
+ NO_WAY("Unknown type");
+ return;
+ }
+
+ if (signCheckOnly)
+ {
+ // We only need to check for a negative value in sourceReg
+ emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, 0);
+ genJumpToThrowHlpBlk(EJ_jl, Compiler::ACK_OVERFLOW);
+ if (dstType == TYP_ULONG)
+ {
+ // cast to TYP_ULONG:
+ // We use a mov with size=EA_4BYTE
+ // which will zero out the upper bits
+ movSize = EA_4BYTE;
+ movRequired = true;
+ }
+ }
+ else
+ {
+ // When we are converting from/to unsigned,
+ // we only have to check for any bits set in 'typeMask'
+ if (isUnsignedSrc || isUnsignedDst)
+ {
+ noway_assert(typeMask != 0);
+ emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, typeMask);
+ genJumpToThrowHlpBlk(EJ_jne, Compiler::ACK_OVERFLOW);
+ }
+ else
+ {
+ // For a narrowing signed cast
+ //
+ // We must check the value is in a signed range.
+
+ // Compare with the MAX
+
+ noway_assert((typeMin != 0) && (typeMax != 0));
+
+ emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, typeMax);
+ genJumpToThrowHlpBlk(EJ_jg, Compiler::ACK_OVERFLOW);
+
+ // Compare with the MIN
+
+ emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, typeMin);
+ genJumpToThrowHlpBlk(EJ_jl, Compiler::ACK_OVERFLOW);
+ }
+ }
+ ins = INS_mov;
+ }
+ else // Non-overflow checking cast.
+ {
+ if (genTypeSize(srcType) == genTypeSize(dstType))
+ {
+ ins = INS_mov;
+ }
+ else
+ {
+ var_types extendType;
+
+ if (genTypeSize(srcType) < genTypeSize(dstType))
+ {
+ extendType = srcType;
+ if (srcType == TYP_UINT)
+ {
+ movSize = EA_4BYTE; // force a mov EA_4BYTE to zero the upper bits
+ movRequired = true;
+ }
+ }
+ else // (genTypeSize(srcType) > genTypeSize(dstType))
+ {
+ extendType = dstType;
+ if (dstType == TYP_INT)
+ {
+ movSize = EA_8BYTE; // a sxtw instruction requires EA_8BYTE
+ }
+ }
+
+ ins = ins_Move_Extend(extendType, castOp->InReg());
+ }
+ }
+
+ if ((ins != INS_mov) || movRequired || (targetReg != sourceReg))
+ {
+ emit->emitIns_R_R(ins, movSize, targetReg, sourceReg);
+ }
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToFloatCast: Generate code for a cast between float and double
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// The cast is between float and double or vice versa.
+//
+void
+CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
+{
+ // float <--> double conversions are always non-overflow ones
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidFloatReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained()); // Cannot be contained
+ assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+ assert(srcType != dstType); // Must specify two different types
+
+ insOpts cvtOption = (srcType == TYP_FLOAT) ? INS_OPTS_S_TO_D // convert Single to Double
+ : INS_OPTS_D_TO_S; // convert Double to Single
+
+ genConsumeOperands(treeNode->AsOp());
+
+ // treeNode must be a reg
+ assert(!treeNode->isContained());
+
+ getEmitter()->emitIns_R_R(INS_fcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genIntToFloatCast: Generate code to cast an int/long to float/double
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// SrcType= int32/uint32/int64/uint64 and DstType=float/double.
+//
+void
+CodeGen::genIntToFloatCast(GenTreePtr treeNode)
+{
+ // int type --> float/double conversions are always non-overflow ones
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidFloatReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained()); // Cannot be contained
+ assert(genIsValidIntReg(op1->gtRegNum)); // Must be a valid int reg.
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+ // force the srcType to unsigned if GT_UNSIGNED flag is set
+ if (treeNode->gtFlags & GTF_UNSIGNED)
+ {
+ srcType = genUnsignedType(srcType);
+ }
+
+ // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE
+ // For conversions from small types (byte/sbyte/int16/uint16) to float/double,
+ // we expect the front-end or lowering phase to have generated two levels of cast.
+ //
+ emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
+ noway_assert((srcSize == EA_4BYTE) ||(srcSize == EA_8BYTE));
+
+ instruction ins = INS_scvtf; // default to sign converts
+ insOpts cvtOption = INS_OPTS_NONE; // invalid value
+
+ if (varTypeIsUnsigned(dstType))
+ {
+ ins = INS_ucvtf; // use unsigned converts
+ }
+
+ if (dstType == TYP_DOUBLE)
+ {
+ if (srcSize == EA_4BYTE)
+ {
+ cvtOption = INS_OPTS_4BYTE_TO_D;
+ }
+ else
+ {
+ assert(srcSize == EA_8BYTE);
+ cvtOption = INS_OPTS_8BYTE_TO_D;
+ }
+ }
+ else
+ {
+ assert(dstType == TYP_FLOAT);
+ if (srcSize == EA_4BYTE)
+ {
+ cvtOption = INS_OPTS_4BYTE_TO_S;
+ }
+ else
+ {
+ assert(srcSize == EA_8BYTE);
+ cvtOption = INS_OPTS_8BYTE_TO_S;
+ }
+ }
+
+ genConsumeOperands(treeNode->AsOp());
+
+ getEmitter()->emitIns_R_R(ins, emitTypeSize(dstType), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToIntCast: Generate code to cast float/double to int/long
+//
+// Arguments:
+// treeNode - The GT_CAST node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// Cast is a non-overflow conversion.
+// The treeNode must have an assigned register.
+// SrcType=float/double and DstType= int32/uint32/int64/uint64
+//
+void
+CodeGen::genFloatToIntCast(GenTreePtr treeNode)
+{
+ // we don't expect to see overflow detecting float/double --> int type conversions here
+ // as they should have been converted into helper calls by front-end.
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidIntReg(targetReg)); // Must be a valid int reg.
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained()); // Cannot be contained
+ assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
+
+ // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE
+ // For conversions to small types (byte/sbyte/int16/uint16) from float/double,
+ // we expect the front-end or lowering phase to have generated two levels of cast.
+ //
+ emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
+ noway_assert((dstSize == EA_4BYTE) ||(dstSize == EA_8BYTE));
+
+ instruction ins = INS_fcvtzs; // default to sign converts
+ insOpts cvtOption = INS_OPTS_NONE; // invalid value
+
+ if (varTypeIsUnsigned(dstType))
+ {
+ ins = INS_fcvtzu; // use unsigned converts
+ }
+
+ if (srcType == TYP_DOUBLE)
+ {
+ if (dstSize == EA_4BYTE)
+ {
+ cvtOption = INS_OPTS_D_TO_4BYTE;
+ }
+ else
+ {
+ assert(dstSize == EA_8BYTE);
+ cvtOption = INS_OPTS_D_TO_8BYTE;
+ }
+ }
+ else
+ {
+ assert(srcType == TYP_FLOAT);
+ if (dstSize == EA_4BYTE)
+ {
+ cvtOption = INS_OPTS_S_TO_4BYTE;
+ }
+ else
+ {
+ assert(dstSize == EA_8BYTE);
+ cvtOption = INS_OPTS_S_TO_8BYTE;
+ }
+ }
+
+ genConsumeOperands(treeNode->AsOp());
+
+ getEmitter()->emitIns_R_R(ins, dstSize, treeNode->gtRegNum, op1->gtRegNum, cvtOption);
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCkfinite: Generate code for ckfinite opcode.
+//
+// Arguments:
+// treeNode - The GT_CKFINITE node
+//
+// Return Value:
+// None.
+//
+// Assumptions:
+// GT_CKFINITE node has reserved an internal register.
+//
+// TODO-ARM64-CQ - mark the operand as contained if known to be in
+// memory (e.g. field or an array element).
+//
+void
+CodeGen::genCkfinite(GenTreePtr treeNode)
+{
+ assert(treeNode->OperGet() == GT_CKFINITE);
+
+#if 0
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ var_types targetType = treeNode->TypeGet();
+ int expMask = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent.
+
+ // Extract exponent into a register.
+ assert(treeNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+ regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ inst_RV_RV(INS_mov_xmm2i, genConsumeReg(op1), tmpReg, targetType);
+ if (targetType == TYP_DOUBLE)
+ {
+ // right shift by 32 bits to get to exponent.
+ inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32);
+ }
+
+ // Mask of exponent with all 1's and check if the exponent is all 1's
+ inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
+ inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
+
+ // If exponent is all 1's, throw ArithmeticException
+ genJumpToThrowHlpBlk(EJ_je, Compiler::ACK_ARITH_EXCPN);
+
+ // if it is a finite value copy it to targetReg
+ if (treeNode->gtRegNum != op1->gtRegNum)
+ {
+ inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType);
+ }
+ genProduceReg(treeNode);
+#else // !0
+ NYI("genCkfinite");
+#endif // !0
+}
+
+int CodeGenInterface::genSPtoFPdelta()
+{
+ int delta;
+
+ // We place the saved frame pointer immediately above the outgoing argument space.
+ delta = (int)compiler->lvaOutgoingArgSpaceSize;
+
+ assert(delta >= 0);
+ return delta;
+}
+
+
+//---------------------------------------------------------------------
+// genTotalFrameSize - return the total size of the stack frame, including local size,
+// callee-saved register size, etc.
+//
+// Return value:
+// Total frame size
+//
+
+int CodeGenInterface::genTotalFrameSize()
+{
+ // For varargs functions, we home all the incoming register arguments. They are not
+ // included in the compCalleeRegsPushed count. This is like prespill on ARM32, but
+ // since we don't use "push" instructions to save them, we don't have to do the
+ // save of these varargs register arguments as the first thing in the prolog.
+
+ assert(!IsUninitialized(compiler->compCalleeRegsPushed));
+
+ int totalFrameSize = (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) +
+ compiler->compCalleeRegsPushed * REGSIZE_BYTES +
+ compiler->compLclFrameSize;
+
+ assert(totalFrameSize >= 0);
+ return totalFrameSize;
+}
+
+
+//---------------------------------------------------------------------
+// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
+// This number is going to be negative, since the Caller-SP is at a higher
+// address than the frame pointer.
+//
+// There must be a frame pointer to call this function!
+
+int CodeGenInterface::genCallerSPtoFPdelta()
+{
+ assert(isFramePointerUsed());
+ int callerSPtoFPdelta;
+
+ callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
+
+ assert(callerSPtoFPdelta <= 0);
+ return callerSPtoFPdelta;
+}
+
+
+//---------------------------------------------------------------------
+// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
+//
+// This number will be negative.
+
+int CodeGenInterface::genCallerSPtoInitialSPdelta()
+{
+ int callerSPtoSPdelta = 0;
+
+ callerSPtoSPdelta -= genTotalFrameSize();
+
+ assert(callerSPtoSPdelta <= 0);
+ return callerSPtoSPdelta;
+}
+
+
+//---------------------------------------------------------------------
+// genMathIntrinsic - generate code for a given math intrinsic
+//
+// Arguments
+// treeNode - the GT_MATH node
+//
+// Return value:
+// None
+//
+void
+CodeGen::genMathIntrinsic(GenTreePtr treeNode)
+{
+#if 0
+ // Right now only Sqrt/Abs are treated as math intrinsics.
+ switch(treeNode->gtMath.gtMathFN)
+ {
+ case CORINFO_INTRINSIC_Sqrt:
+ noway_assert(treeNode->TypeGet() == TYP_DOUBLE);
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitInsBinary(INS_sqrtsd, emitTypeSize(treeNode), treeNode, treeNode->gtOp.gtOp1);
+ break;
+
+ case CORINFO_INTRINSIC_Abs:
+ genSSE2BitwiseOp(treeNode);
+ break;
+
+ default:
+ assert(!"genMathIntrinsic: Unsupported math intrinsic");
+ unreached();
+ }
+
+ genProduceReg(treeNode);
+#else // !0
+ NYI("genMathIntrinsic");
+#endif // !0
+}
+
+/*****************************************************************************
+ *
+ * Create and record GC Info for the function.
+ */
+void
+CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUG_ARG(void* codePtr))
+{
+ genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUG_ARG(codePtr));
+}
+
+void
+CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUG_ARG(void* codePtr))
+{
+ IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
+ GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC) GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc);
+ assert(gcInfoEncoder != nullptr);
+
+ // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
+ gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+
+ // First we figure out the encoder ID's for the stack slots and registers.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
+
+ // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
+ gcInfoEncoder->FinalizeSlotIds();
+
+ // Now we can actually use those slot ID's to declare live ranges.
+ gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
+
+#if defined(DEBUGGING_SUPPORT)
+ if (compiler->opts.compDbgEnC)
+ {
+ // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
+ // which is:
+ // -return address
+ // -saved off RBP
+ // -saved 'this' pointer and bool for synchronized methods
+
+ // 4 slots for RBP + return address + RSI + RDI
+ int preservedAreaSize = 4 * REGSIZE_BYTES;
+
+ if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+ {
+ if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
+ preservedAreaSize += REGSIZE_BYTES;
+
+ preservedAreaSize += 1; // bool for synchronized methods
+ }
+
+ // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the frame
+ gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
+ }
+#endif
+
+ gcInfoEncoder->Build();
+
+ //GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
+ //let's save the values anyway for debugging purposes
+ compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
+ compiler->compInfoBlkSize = 0; //not exposed by the GCEncoder interface
+}
+
+/*****************************************************************************
+ * Emit a call to a helper function.
+ *
+ */
+
+void CodeGen::genEmitHelperCall(unsigned helper,
+ int argSize,
+ emitAttr retSize)
+{
+ void* addr = nullptr;
+ void* pAddr = nullptr;
+
+ emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+ addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
+ regNumber callTarget = REG_NA;
+
+ if (addr == nullptr)
+ {
+ NYI("genEmitHelperCall indirect");
+#if 0
+ assert(pAddr != nullptr);
+ if (genAddrShouldUsePCRel((size_t)pAddr))
+ {
+ // generate call whose target is specified by PC-relative 32-bit offset.
+ callType = emitter::EC_FUNC_TOKEN_INDIR;
+ addr = pAddr;
+ }
+ else
+ {
+ // If this address cannot be encoded as PC-relative 32-bit offset, load it into REG_HELPER_CALL_TARGET
+ // and use register indirect addressing mode to make the call.
+ // mov reg, addr
+ // call [reg]
+ callTarget = callTargetReg;
+ CodeGen::genSetRegToIcon(callTarget, (ssize_t) pAddr, TYP_I_IMPL);
+ callType = emitter::EC_INDIR_ARD;
+ }
+#endif // 0
+ }
+
+ getEmitter()->emitIns_Call(callType,
+ compiler->eeFindHelper(helper),
+ INDEBUG_LDISASM_COMMA(nullptr)
+ addr,
+ argSize,
+ retSize,
+ gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
+ BAD_IL_OFFSET, /* IL offset */
+ callTarget, /* ireg */
+ REG_NA, 0, 0, /* xreg, xmul, disp */
+ false, /* isJump */
+ emitter::emitNoGChelper(helper));
+
+ regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+ regTracker.rsTrashRegSet(killMask);
+ regTracker.rsTrashRegsForGCInterruptability();
+}
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************
+ * genSetScopeInfo
+ *
+ * Called for every scope info piece to record by the main genSetScopeInfo()
+ */
+
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genSetScopeInfo (unsigned which,
+ UNATIVE_OFFSET startOffs,
+ UNATIVE_OFFSET length,
+ unsigned varNum,
+ unsigned LVnum,
+ bool avail,
+ Compiler::siVarLoc& varLoc)
+{
+ /* We need to do some mapping while reporting back these variables */
+
+ unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
+ noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
+
+ VarName name = nullptr;
+
+#ifdef DEBUG
+
+ for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
+ {
+ if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
+ {
+ name = compiler->info.compVarScopes[scopeNum].vsdName;
+ }
+ }
+
+ // Hang on to this compiler->info.
+
+ TrnslLocalVarInfo &tlvi = genTrnslLocalVarInfo[which];
+
+ tlvi.tlviVarNum = ilVarNum;
+ tlvi.tlviLVnum = LVnum;
+ tlvi.tlviName = name;
+ tlvi.tlviStartPC = startOffs;
+ tlvi.tlviLength = length;
+ tlvi.tlviAvailable = avail;
+ tlvi.tlviVarLoc = varLoc;
+
+#endif // DEBUG
+
+ compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
+}
+#endif // DEBUGGING_SUPPORT
+
+
+/*****************************************************************************
+ * Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog
+ * (it's as good a place as any), then use COMPLUS_JitLateDisasm=* to see if the late
+ * disassembler thinks the instructions as the same as we do.
+ */
+
+// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
+// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
+//#define ALL_ARM64_EMITTER_UNIT_TESTS
+
+#if defined(DEBUG)
+void CodeGen::genArm64EmitterUnitTests()
+{
+ if (!verbose)
+ {
+ return;
+ }
+
+ if (!compiler->opts.altJit)
+ {
+ // No point doing this in a "real" JIT.
+ return;
+ }
+
+ // Mark the "fake" instructions in the output.
+ printf("*************** In genArm64EmitterUnitTests()\n");
+
+ emitter* theEmitter = getEmitter();
+
+ // We use this:
+ // genDefineTempLabel(genCreateTempLabel());
+ // to create artificial labels to help separate groups of tests.
+
+ //
+ // Loads/Stores basic general register
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // ldr/str Xt, [reg]
+ theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9);
+
+ // ldr/str Wt, [reg]
+ theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9);
+
+ theEmitter->emitIns_R_R(INS_ldrsb, EA_4BYTE, REG_R8, REG_R9); // target Wt
+ theEmitter->emitIns_R_R(INS_ldrsh, EA_4BYTE, REG_R8, REG_R9); // target Wt
+ theEmitter->emitIns_R_R(INS_ldrsb, EA_8BYTE, REG_R8, REG_R9); // target Xt
+ theEmitter->emitIns_R_R(INS_ldrsh, EA_8BYTE, REG_R8, REG_R9); // target Xt
+ theEmitter->emitIns_R_R(INS_ldrsw, EA_8BYTE, REG_R8, REG_R9); // target Xt
+
+ theEmitter->emitIns_R_R_I(INS_ldurb, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldurh, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_sturb, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_sturh, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldursb, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldursb, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldursh, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldursh, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldursw, EA_8BYTE, REG_R8, REG_R9, 1);
+
+ // SP and ZR tests
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_SP, 1);
+ theEmitter->emitIns_R_R_I(INS_ldurb, EA_8BYTE, REG_ZR, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldurh, EA_8BYTE, REG_ZR, REG_SP, 1);
+
+ // scaled
+ theEmitter->emitIns_R_R_I(INS_ldrb, EA_1BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldrh, EA_2BYTE, REG_R8, REG_R9, 2);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 4);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 8);
+
+ // pre-/post-indexed (unscaled)
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // Compares
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // cmp reg, reg
+ theEmitter->emitIns_R_R(INS_cmp, EA_8BYTE, REG_R8, REG_R9);
+ theEmitter->emitIns_R_R(INS_cmn, EA_8BYTE, REG_R8, REG_R9);
+
+ // cmp reg, imm
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0);
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095);
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 1 << 12);
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095 << 12);
+
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0);
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095);
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 1 << 12);
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095 << 12);
+
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -1);
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -0xfff);
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -1);
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -0xfff);
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xffffffffff800000LL);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+
+ // R_R
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_R1, REG_R12);
+ theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_R2, REG_R13);
+ theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_R3, REG_R14);
+ theEmitter->emitIns_R_R(INS_rev, EA_8BYTE, REG_R4, REG_R15);
+ theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_R5, REG_R0);
+ theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_R6, REG_R1);
+
+ theEmitter->emitIns_R_R(INS_cls, EA_4BYTE, REG_R7, REG_R2);
+ theEmitter->emitIns_R_R(INS_clz, EA_4BYTE, REG_R8, REG_R3);
+ theEmitter->emitIns_R_R(INS_rbit, EA_4BYTE, REG_R9, REG_R4);
+ theEmitter->emitIns_R_R(INS_rev, EA_4BYTE, REG_R10, REG_R5);
+ theEmitter->emitIns_R_R(INS_rev16, EA_4BYTE, REG_R11, REG_R6);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_I
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // mov reg, imm(i16,hw)
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000000001234);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000043210000);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000567800000000);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765000000000000);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFFFFFF1234);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFF4321FFFF);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFF5678FFFFFFFF);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765FFFFFFFFFFFF);
+
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00001234);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x87650000);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xFFFF1234);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x4567FFFF);
+
+ // mov reg, imm(N,r,s)
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x00FFFFF000000000);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x6666666666666666);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_SP, 0x7FFF00007FFF0000);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x5555555555555555);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xE003E003E003E003);
+ theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0707070707070707);
+
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00FFFFF0);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x66666666);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x03FFC000);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x55555555);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xE003E003);
+ theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x07070707);
+
+ theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0xE003E003E003E003);
+ theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x00FFFFF000000000);
+ theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x6666666666666666);
+ theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x0707070707070707);
+ theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x7FFF00007FFF0000);
+ theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x5555555555555555);
+
+ theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xE003E003);
+ theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x00FFFFF0);
+ theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x66666666);
+ theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x07070707);
+ theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xFFF00000);
+ theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x55555555);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // tst reg, reg
+ theEmitter->emitIns_R_R(INS_tst, EA_8BYTE, REG_R7, REG_R10);
+
+ // mov reg, reg
+ theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R7, REG_R10);
+ theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R8, REG_SP);
+ theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_SP, REG_R9);
+
+ theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_R5, REG_R11);
+ theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_R4, REG_R12);
+ theEmitter->emitIns_R_R(INS_negs, EA_8BYTE, REG_R3, REG_R13);
+
+ theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R7, REG_R10);
+ theEmitter->emitIns_R_R(INS_mvn, EA_4BYTE, REG_R5, REG_R11);
+ theEmitter->emitIns_R_R(INS_neg, EA_4BYTE, REG_R4, REG_R12);
+ theEmitter->emitIns_R_R(INS_negs, EA_4BYTE, REG_R3, REG_R13);
+
+ theEmitter->emitIns_R_R(INS_sxtb, EA_8BYTE, REG_R7, REG_R10);
+ theEmitter->emitIns_R_R(INS_sxth, EA_8BYTE, REG_R5, REG_R11);
+ theEmitter->emitIns_R_R(INS_sxtw, EA_8BYTE, REG_R4, REG_R12);
+ theEmitter->emitIns_R_R(INS_uxtb, EA_8BYTE, REG_R3, REG_R13); // map to Wt
+ theEmitter->emitIns_R_R(INS_uxth, EA_8BYTE, REG_R2, REG_R14); // map to Wt
+
+ theEmitter->emitIns_R_R(INS_sxtb, EA_4BYTE, REG_R7, REG_R10);
+ theEmitter->emitIns_R_R(INS_sxth, EA_4BYTE, REG_R5, REG_R11);
+ theEmitter->emitIns_R_R(INS_uxtb, EA_4BYTE, REG_R3, REG_R13);
+ theEmitter->emitIns_R_R(INS_uxth, EA_4BYTE, REG_R2, REG_R14);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_I_I
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // mov reg, imm(i16,hw)
+ theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x1234, 0, INS_OPTS_LSL);
+ theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
+
+ theEmitter->emitIns_R_I_I(INS_movk, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
+ theEmitter->emitIns_R_I_I(INS_movn, EA_8BYTE, REG_R8, 0x5678, 32, INS_OPTS_LSL);
+ theEmitter->emitIns_R_I_I(INS_movz, EA_8BYTE, REG_R8, 0x8765, 48, INS_OPTS_LSL);
+
+ theEmitter->emitIns_R_I_I(INS_movk, EA_4BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
+ theEmitter->emitIns_R_I_I(INS_movn, EA_4BYTE, REG_R8, 0x5678, 16, INS_OPTS_LSL);
+ theEmitter->emitIns_R_I_I(INS_movz, EA_4BYTE, REG_R8, 0x8765, 16, INS_OPTS_LSL);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R_I
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_I(INS_lsl, EA_8BYTE, REG_R0, REG_R0, 1);
+ theEmitter->emitIns_R_R_I(INS_lsl, EA_4BYTE, REG_R9, REG_R3, 18);
+ theEmitter->emitIns_R_R_I(INS_lsr, EA_8BYTE, REG_R7, REG_R0, 37);
+ theEmitter->emitIns_R_R_I(INS_lsr, EA_4BYTE, REG_R0, REG_R1, 2);
+ theEmitter->emitIns_R_R_I(INS_asr, EA_8BYTE, REG_R2, REG_R3, 53);
+ theEmitter->emitIns_R_R_I(INS_asr, EA_4BYTE, REG_R9, REG_R3, 18);
+
+ theEmitter->emitIns_R_R_I(INS_and, EA_8BYTE, REG_R2, REG_R3, 0x5555555555555555);
+ theEmitter->emitIns_R_R_I(INS_ands, EA_8BYTE, REG_R1, REG_R5, 0x6666666666666666);
+ theEmitter->emitIns_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, 0x0707070707070707);
+ theEmitter->emitIns_R_R_I(INS_orr, EA_8BYTE, REG_SP, REG_R3, 0xFFFC000000000000);
+ theEmitter->emitIns_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, 0xE003E003);
+
+ theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 31);
+ theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 32);
+ theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 63);
+
+ theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 31);
+
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -1);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -0xfff);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0x1000);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+ theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R_I cmp/txt
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+ // cmp
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0);
+
+ // CMP (shifted register)
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR);
+
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR);
+
+ // TST (shifted register)
+ theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR);
+ theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 34, INS_OPTS_ROR);
+
+ theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR);
+ theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 24, INS_OPTS_ROR);
+
+ // CMP (extended register)
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTH);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTW); // "cmp x8, x9, UXTW"; msdis disassembles this "cmp x8,x9", which looks like an msdis issue.
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTX);
+
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTH);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTX);
+
+ // CMP 64-bit (extended register) and left shift
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_UXTB);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTX);
+
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_SXTB);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTX);
+
+ // CMP 32-bit (extended register) and left shift
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTW);
+
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH);
+ theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTW);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R_R
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_R(INS_lsl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_lsr, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_asr, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_ror, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_adc, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_adcs, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_sbc, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_sbcs, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_udiv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_sdiv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_mneg, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_smull, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_smnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_smulh, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_umull, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_umnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_umulh, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_lslv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_lsrv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_asrv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_rorv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+
+ theEmitter->emitIns_R_R_R(INS_lsl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_lsr, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_asr, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_ror, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_adc, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_adcs, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_sbc, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_sbcs, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_udiv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_sdiv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_mul, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_mneg, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_smull, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_smnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_smulh, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_umull, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_umnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_umulh, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_lslv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_lsrv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_asrv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+ theEmitter->emitIns_R_R_R(INS_rorv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R_I_I
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_8BYTE, REG_R2, REG_R3, 4, 39);
+ theEmitter->emitIns_R_R_I_I(INS_bfm, EA_8BYTE, REG_R1, REG_R5, 20, 23);
+ theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_8BYTE, REG_R8, REG_R9, 36, 7);
+
+ theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_8BYTE, REG_R2, REG_R3, 7, 37);
+ theEmitter->emitIns_R_R_I_I(INS_bfi, EA_8BYTE, REG_R1, REG_R5, 23, 21);
+ theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_8BYTE, REG_R8, REG_R9, 39, 5);
+
+ theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_8BYTE, REG_R2, REG_R3, 10, 24);
+ theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_8BYTE, REG_R1, REG_R5, 26, 16);
+ theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_8BYTE, REG_R8, REG_R9, 42, 8);
+
+ theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_4BYTE, REG_R2, REG_R3, 4, 19);
+ theEmitter->emitIns_R_R_I_I(INS_bfm, EA_4BYTE, REG_R1, REG_R5, 10, 13);
+ theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_4BYTE, REG_R8, REG_R9, 16, 7);
+
+ theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_4BYTE, REG_R2, REG_R3, 5, 17);
+ theEmitter->emitIns_R_R_I_I(INS_bfi, EA_4BYTE, REG_R1, REG_R5, 13, 11);
+ theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_4BYTE, REG_R8, REG_R9, 19, 5);
+
+ theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_4BYTE, REG_R2, REG_R3, 3, 14);
+ theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_4BYTE, REG_R1, REG_R5, 11, 9);
+ theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_4BYTE, REG_R8, REG_R9, 22, 8);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R_R_I
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // ADD (extended register)
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX);
+
+ // ADD (extended register) and left shift
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX);
+
+ // ADD (shifted register)
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 33, INS_OPTS_ASR);
+
+ // EXTR (extract field from register pair)
+ theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1);
+ theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31);
+ theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32);
+ theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 63);
+
+ theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1);
+ theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 31);
+
+ // SUB (extended register)
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX);
+
+ // SUB (extended register) and left shift
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX);
+
+ // SUB (shifted register)
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 27, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 28, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 29, INS_OPTS_ASR);
+
+ // bit operations
+ theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+
+ theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR);
+ theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR);
+ theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR);
+ theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR);
+
+ theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+
+ theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR);
+ theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR);
+ theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR);
+ theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR);
+ theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R_R_I -- load/store pair
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8);
+
+ theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8);
+
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+
+ theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16);
+ theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+
+ // SP and ZR tests
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_SP, 16, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_R8, 16, INS_OPTS_PRE_INDEX);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R_R_Ext -- load/store shifted/extend
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // LDR (register)
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+
+ // STR (register)
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R_R_R
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_R_R(INS_madd, EA_4BYTE, REG_R0, REG_R12, REG_R27, REG_R10);
+ theEmitter->emitIns_R_R_R_R(INS_msub, EA_4BYTE, REG_R1, REG_R13, REG_R28, REG_R11);
+ theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_4BYTE, REG_R2, REG_R14, REG_R0, REG_R12);
+ theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_4BYTE, REG_R3, REG_R15, REG_R1, REG_R13);
+ theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_4BYTE, REG_R4, REG_R19, REG_R2, REG_R14);
+ theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_4BYTE, REG_R5, REG_R20, REG_R3, REG_R15);
+
+ theEmitter->emitIns_R_R_R_R(INS_madd, EA_8BYTE, REG_R6, REG_R21, REG_R4, REG_R19);
+ theEmitter->emitIns_R_R_R_R(INS_msub, EA_8BYTE, REG_R7, REG_R22, REG_R5, REG_R20);
+ theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_8BYTE, REG_R8, REG_R23, REG_R6, REG_R21);
+ theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_8BYTE, REG_R9, REG_R24, REG_R7, REG_R22);
+ theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_8BYTE, REG_R10, REG_R25, REG_R8, REG_R23);
+ theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_8BYTE, REG_R11, REG_R26, REG_R9, REG_R24);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // R_COND
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // cset reg, cond
+ theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R9, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R8, INS_COND_NE); // ne
+ theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R7, INS_COND_HS); // hs
+ theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R6, INS_COND_LO); // lo
+ theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R5, INS_COND_MI); // mi
+ theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R4, INS_COND_PL); // pl
+ theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R3, INS_COND_VS); // vs
+ theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R2, INS_COND_VC); // vc
+ theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R1, INS_COND_HI); // hi
+ theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R0, INS_COND_LS); // ls
+ theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R9, INS_COND_GE); // ge
+ theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R8, INS_COND_LT); // lt
+ theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R7, INS_COND_GT); // gt
+ theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R6, INS_COND_LE); // le
+
+ // csetm reg, cond
+ theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R9, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R8, INS_COND_NE); // ne
+ theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R7, INS_COND_HS); // hs
+ theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R6, INS_COND_LO); // lo
+ theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R5, INS_COND_MI); // mi
+ theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R4, INS_COND_PL); // pl
+ theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R3, INS_COND_VS); // vs
+ theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R2, INS_COND_VC); // vc
+ theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R1, INS_COND_HI); // hi
+ theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R0, INS_COND_LS); // ls
+ theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R9, INS_COND_GE); // ge
+ theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R8, INS_COND_LT); // lt
+ theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R7, INS_COND_GT); // gt
+ theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R6, INS_COND_LE); // le
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // R_R_COND
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // cinc reg, reg, cond
+ // cinv reg, reg, cond
+ // cneg reg, reg, cond
+ theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R0, REG_R4, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R1, REG_R5, INS_COND_NE); // ne
+ theEmitter->emitIns_R_R_COND(INS_cneg, EA_4BYTE, REG_R2, REG_R6, INS_COND_HS); // hs
+ theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R3, REG_R7, INS_COND_LO); // lo
+ theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R4, REG_R8, INS_COND_MI); // mi
+ theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R5, REG_R9, INS_COND_PL); // pl
+ theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R6, REG_R0, INS_COND_VS); // vs
+ theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R7, REG_R1, INS_COND_VC); // vc
+ theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R8, REG_R2, INS_COND_HI); // hi
+ theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R9, REG_R3, INS_COND_LS); // ls
+ theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R0, REG_R4, INS_COND_GE); // ge
+ theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R2, REG_R5, INS_COND_LT); // lt
+ theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R2, REG_R6, INS_COND_GT); // gt
+ theEmitter->emitIns_R_R_COND(INS_cinv, EA_8BYTE, REG_R3, REG_R7, INS_COND_LE); // le
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // R_R_R_COND
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // csel reg, reg, reg, cond
+ // csinc reg, reg, reg, cond
+ // csinv reg, reg, reg, cond
+ // csneg reg, reg, reg, cond
+ theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R0, REG_R4, REG_R8, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R1, REG_R5, REG_R9, INS_COND_NE); // ne
+ theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_HS); // hs
+ theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LO); // lo
+ theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R4, REG_R8, REG_R2, INS_COND_MI); // mi
+ theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R5, REG_R9, REG_R3, INS_COND_PL); // pl
+ theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_8BYTE, REG_R6, REG_R0, REG_R4, INS_COND_VS); // vs
+ theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_4BYTE, REG_R7, REG_R1, REG_R5, INS_COND_VC); // vc
+ theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R8, REG_R2, REG_R6, INS_COND_HI); // hi
+ theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R9, REG_R3, REG_R7, INS_COND_LS); // ls
+ theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R0, REG_R4, REG_R8, INS_COND_GE); // ge
+ theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R2, REG_R5, REG_R9, INS_COND_LT); // lt
+ theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_GT); // gt
+ theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LE); // le
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // R_R_FLAGS_COND
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // ccmp reg1, reg2, nzcv, cond
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le
+
+ // ccmp reg1, imm, nzcv, cond
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le
+
+ // ccmp reg1, imm, nzcv, cond -- encoded as ccmn
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, -3, INS_FLAGS_V, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, -2, INS_FLAGS_C, INS_COND_NE); // ne
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, -1, INS_FLAGS_Z, INS_COND_HS); // hs
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, -5, INS_FLAGS_N, INS_COND_LO); // lo
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, -31, INS_FLAGS_CV, INS_COND_MI); // mi
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, -28, INS_FLAGS_ZV, INS_COND_PL); // pl
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, -25, INS_FLAGS_ZC, INS_COND_VS); // vs
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, -22, INS_FLAGS_NV, INS_COND_VC); // vc
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, -19, INS_FLAGS_NC, INS_COND_HI); // hi
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, -16, INS_FLAGS_NZ, INS_COND_LS); // ls
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, -13, INS_FLAGS_NONE, INS_COND_GE); // ge
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, -10, INS_FLAGS_NZV, INS_COND_LT); // lt
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, -7, INS_FLAGS_NZC, INS_COND_GT); // gt
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, -4, INS_FLAGS_NZCV, INS_COND_LE); // le
+
+ // ccmn reg1, reg2, nzcv, cond
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE); // ne
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS); // hs
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO); // lo
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI); // mi
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL); // pl
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS); // vs
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC); // vc
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI); // hi
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS); // ls
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT); // lt
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT); // gt
+ theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le
+
+ // ccmn reg1, imm, nzcv, cond
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ); // eq
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE); // ne
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS); // hs
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO); // lo
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI); // mi
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL); // pl
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS); // vs
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC); // vc
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI); // hi
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS); // ls
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT); // lt
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt
+ theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // Branch to register
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R(INS_br, EA_PTRSIZE, REG_R8);
+ theEmitter->emitIns_R(INS_blr, EA_PTRSIZE, REG_R9);
+ theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8);
+ theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // Misc
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0);
+ theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 65535);
+
+ theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_OSHLD);
+ theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_OSHST);
+ theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_OSH);
+
+ theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_NSHLD);
+ theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_NSHST);
+ theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_NSH);
+
+ theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_ISHLD);
+ theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_ISHST);
+ theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ISH);
+
+ theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_LD);
+ theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ST);
+ theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_SY);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ ////////////////////////////////////////////////////////////////////////////////
+ //
+ // SIMD and Floating point
+ //
+ ////////////////////////////////////////////////////////////////////////////////
+
+ //
+ // Load/Stores vector register
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // ldr/str Vt, [reg]
+ theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_V1, REG_R9);
+ theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_V2, REG_R8);
+ theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_V3, REG_R7);
+ theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_V4, REG_R6);
+ theEmitter->emitIns_R_R(INS_ldr, EA_2BYTE, REG_V5, REG_R5);
+ theEmitter->emitIns_R_R(INS_str, EA_2BYTE, REG_V6, REG_R4);
+ theEmitter->emitIns_R_R(INS_ldr, EA_1BYTE, REG_V7, REG_R3);
+ theEmitter->emitIns_R_R(INS_str, EA_1BYTE, REG_V8, REG_R2);
+ theEmitter->emitIns_R_R(INS_ldr, EA_16BYTE, REG_V9, REG_R1);
+ theEmitter->emitIns_R_R(INS_str, EA_16BYTE, REG_V10, REG_R0);
+
+ // ldr/str Vt, [reg+cns] -- scaled
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 2);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 4);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 8);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 16);
+
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V7, REG_R10, 1);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V7, REG_R10, 2);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V7, REG_R10, 4);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V7, REG_R10, 8);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V7, REG_R10, 16);
+
+ // ldr/str Vt, [reg],cns -- post-indexed (unscaled)
+ // ldr/str Vt, [reg+cns]! -- post-indexed (unscaled)
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+
+ theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+
+ theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_1BYTE, REG_V8, REG_R9, 2);
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_2BYTE, REG_V8, REG_R9, 3);
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_V8, REG_R9, 5);
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_V8, REG_R9, 9);
+ theEmitter->emitIns_R_R_I(INS_ldur, EA_16BYTE, REG_V8, REG_R9, 17);
+
+ theEmitter->emitIns_R_R_I(INS_stur, EA_1BYTE, REG_V7, REG_R10, 2);
+ theEmitter->emitIns_R_R_I(INS_stur, EA_2BYTE, REG_V7, REG_R10, 3);
+ theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_V7, REG_R10, 5);
+ theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_V7, REG_R10, 9);
+ theEmitter->emitIns_R_R_I(INS_stur, EA_16BYTE, REG_V7, REG_R10, 17);
+
+ // load/store pair
+ theEmitter->emitIns_R_R_R (INS_ldnp, EA_8BYTE, REG_V0, REG_V1, REG_R10);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V1, REG_V2, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_V2, REG_V3, REG_R10, 8);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 24);
+
+ theEmitter->emitIns_R_R_R (INS_ldnp, EA_4BYTE, REG_V4, REG_V5, REG_SP);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 4);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V7, REG_V8, REG_SP, 12);
+
+ theEmitter->emitIns_R_R_R (INS_ldnp, EA_16BYTE, REG_V8, REG_V9, REG_R10);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V9, REG_V10, REG_R10, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_16BYTE, REG_V10, REG_V11, REG_R10, 16);
+ theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V11, REG_V12, REG_R10, 48);
+
+ theEmitter->emitIns_R_R_R (INS_ldp, EA_8BYTE, REG_V0, REG_V1, REG_R10);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V1, REG_V2, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V2, REG_V3, REG_SP, 8);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 16);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V4, REG_V5, REG_R10, 24, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V5, REG_V6, REG_SP, 32, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V6, REG_V7, REG_SP, 40, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V7, REG_V8, REG_R10, 48, INS_OPTS_PRE_INDEX);
+
+ theEmitter->emitIns_R_R_R (INS_ldp, EA_4BYTE, REG_V0, REG_V1, REG_R10);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V1, REG_V2, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V2, REG_V3, REG_SP, 4);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V3, REG_V4, REG_R10, 8);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V4, REG_V5, REG_R10, 12, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 16, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 20, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V7, REG_V8, REG_R10, 24, INS_OPTS_PRE_INDEX);
+
+ theEmitter->emitIns_R_R_R (INS_ldp, EA_16BYTE, REG_V0, REG_V1, REG_R10);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V1, REG_V2, REG_SP, 0);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V2, REG_V3, REG_SP, 16);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V3, REG_V4, REG_R10, 32);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V4, REG_V5, REG_R10, 48, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V5, REG_V6, REG_SP, 64, INS_OPTS_POST_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V6, REG_V7, REG_SP, 80, INS_OPTS_PRE_INDEX);
+ theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V7, REG_V8, REG_R10, 96, INS_OPTS_PRE_INDEX);
+
+ // LDR (register)
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V1, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 3);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V1, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 2);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V1, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 4);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 4);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 4);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 4);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 4);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V1, REG_SP, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 1);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
+
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V1, REG_R7, REG_R9);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V2, REG_SP, REG_R9, INS_OPTS_SXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_UXTW);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V4, REG_SP, REG_R9, INS_OPTS_SXTX);
+ theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_UXTX);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R mov and aliases for mov
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // mov vector to vector
+ theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_mov, EA_16BYTE, REG_V2, REG_V3);
+
+ theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V12, REG_V13);
+ theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V14, REG_V15);
+ theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V16, REG_V17);
+
+ // mov vector to general
+ theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R0, REG_V4);
+ theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R1, REG_V5);
+ theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_R2, REG_V6);
+ theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_R3, REG_V7);
+
+ // mov general to vector
+ theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V8, REG_R4);
+ theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V9, REG_R5);
+ theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V10, REG_R6);
+ theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V11, REG_R7);
+
+ // mov vector[index] to vector
+ theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V2, REG_V3, 3);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V4, REG_V5, 7);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V6, REG_V7, 15);
+
+ // mov to general from vector[index]
+ theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_R8, REG_V16, 1);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_R9, REG_V17, 2);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_R10, REG_V18, 3);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_R11, REG_V19, 4);
+
+ // mov to vector[index] from general
+ theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V20, REG_R12, 1);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V21, REG_R13, 2);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V22, REG_R14, 6);
+ theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V23, REG_R15, 8);
+
+ // mov vector[index] to vector[index2]
+ theEmitter->emitIns_R_R_I_I(INS_mov, EA_8BYTE, REG_V8, REG_V9, 1, 0);
+ theEmitter->emitIns_R_R_I_I(INS_mov, EA_4BYTE, REG_V10, REG_V11, 2, 1);
+ theEmitter->emitIns_R_R_I_I(INS_mov, EA_2BYTE, REG_V12, REG_V13, 5, 2);
+ theEmitter->emitIns_R_R_I_I(INS_mov, EA_1BYTE, REG_V14, REG_V15, 12, 3);
+
+ //////////////////////////////////////////////////////////////////////////////////
+
+ // mov/dup scalar
+ theEmitter->emitIns_R_R_I(INS_dup, EA_8BYTE, REG_V24, REG_V25, 1);
+ theEmitter->emitIns_R_R_I(INS_dup, EA_4BYTE, REG_V26, REG_V27, 3);
+ theEmitter->emitIns_R_R_I(INS_dup, EA_2BYTE, REG_V28, REG_V29, 7);
+ theEmitter->emitIns_R_R_I(INS_dup, EA_1BYTE, REG_V30, REG_V31, 15);
+
+ // mov/ins vector element
+ theEmitter->emitIns_R_R_I_I(INS_ins, EA_8BYTE, REG_V0, REG_V1, 0, 1);
+ theEmitter->emitIns_R_R_I_I(INS_ins, EA_4BYTE, REG_V2, REG_V3, 2, 2);
+ theEmitter->emitIns_R_R_I_I(INS_ins, EA_2BYTE, REG_V4, REG_V5, 4, 3);
+ theEmitter->emitIns_R_R_I_I(INS_ins, EA_1BYTE, REG_V6, REG_V7, 8, 4);
+
+ // umov to general from vector element
+ theEmitter->emitIns_R_R_I(INS_umov, EA_8BYTE, REG_R0, REG_V8, 1);
+ theEmitter->emitIns_R_R_I(INS_umov, EA_4BYTE, REG_R1, REG_V9, 2);
+ theEmitter->emitIns_R_R_I(INS_umov, EA_2BYTE, REG_R2, REG_V10, 4);
+ theEmitter->emitIns_R_R_I(INS_umov, EA_1BYTE, REG_R3, REG_V11, 8);
+
+ // ins to vector element from general
+ theEmitter->emitIns_R_R_I(INS_ins, EA_8BYTE, REG_V12, REG_R4, 1);
+ theEmitter->emitIns_R_R_I(INS_ins, EA_4BYTE, REG_V13, REG_R5, 3);
+ theEmitter->emitIns_R_R_I(INS_ins, EA_2BYTE, REG_V14, REG_R6, 7);
+ theEmitter->emitIns_R_R_I(INS_ins, EA_1BYTE, REG_V15, REG_R7, 15);
+
+ // smov to general from vector element
+ theEmitter->emitIns_R_R_I(INS_smov, EA_4BYTE, REG_R5, REG_V17, 2);
+ theEmitter->emitIns_R_R_I(INS_smov, EA_2BYTE, REG_R6, REG_V18, 4);
+ theEmitter->emitIns_R_R_I(INS_smov, EA_1BYTE, REG_R7, REG_V19, 8);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_I movi and mvni
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // movi imm8 (vector)
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V0, 0x00, INS_OPTS_8B);
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V1, 0xFF, INS_OPTS_8B);
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V2, 0x00, INS_OPTS_16B);
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V3, 0xFF, INS_OPTS_16B);
+
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V4, 0x007F, INS_OPTS_4H);
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V5, 0x7F00, INS_OPTS_4H); // LSL 8
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V6, 0x003F, INS_OPTS_8H);
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V7, 0x3F00, INS_OPTS_8H); // LSL 8
+
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V8, 0x1F, INS_OPTS_2S);
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V9, 0x1F00, INS_OPTS_2S); // LSL 8
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V10, 0x1F0000, INS_OPTS_2S); // LSL 16
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V11, 0x1F000000, INS_OPTS_2S); // LSL 24
+
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V12, 0x1FFF, INS_OPTS_2S); // MSL 8
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V13, 0x1FFFFF, INS_OPTS_2S); // MSL 16
+
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V14, 0x37, INS_OPTS_4S);
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V15, 0x3700, INS_OPTS_4S); // LSL 8
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V16, 0x370000, INS_OPTS_4S); // LSL 16
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V17, 0x37000000, INS_OPTS_4S); // LSL 24
+
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V18, 0x37FF, INS_OPTS_4S); // MSL 8
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V19, 0x37FFFF, INS_OPTS_4S); // MSL 16
+
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V20, 0xFF80, INS_OPTS_4H); // mvni
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V21, 0xFFC0, INS_OPTS_8H); // mvni
+
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V22, 0xFFFFFFE0, INS_OPTS_2S); // mvni
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V23, 0xFFFFF0FF, INS_OPTS_4S); // mvni LSL 8
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V24, 0xFFF8FFFF, INS_OPTS_2S); // mvni LSL 16
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V25, 0xFCFFFFFF, INS_OPTS_4S); // mvni LSL 24
+
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V26, 0xFFFFFE00, INS_OPTS_2S); // mvni MSL 8
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V27, 0xFFFC0000, INS_OPTS_4S); // mvni MSL 16
+
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V28, 0x00FF00FF00FF00FF, INS_OPTS_1D);
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V29, 0x00FFFF0000FFFF00, INS_OPTS_2D);
+ theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V30, 0xFF000000FF000000);
+ theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V31, 0x0, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
+
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
+
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V8, 0x42FF, INS_OPTS_2S); // MSL 8
+ theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V9, 0x42FFFF, INS_OPTS_2S); // MSL 16
+
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
+
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V14, 0x5DFF, INS_OPTS_4S); // MSL 8
+ theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V15, 0x5DFFFF, INS_OPTS_4S); // MSL 16
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_I orr/bic vector immediate
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
+ theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
+ theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
+ theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
+
+ theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
+ theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
+ theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
+ theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
+
+ theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
+ theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
+ theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
+ theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
+
+ theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
+ theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL 8
+ theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
+ theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL 8
+
+ theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
+ theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S); // LSL 8
+ theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S); // LSL 16
+ theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
+
+ theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
+ theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S); // LSL 8
+ theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16
+ theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_F cmp/fmov immediate
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // fmov imm8 (scalar)
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V14, 1.0);
+ theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V15, -1.0);
+ theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V0, 2.0); // encodes imm8 == 0
+ theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V16, 10.0);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V17, -10.0);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V18, 31); // Largest encodable value
+ theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V19, -31);
+ theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V20, 1.25);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V21, -1.25);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V22, 0.125); // Smallest encodable value
+ theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V23, -0.125);
+
+ // fmov imm8 (vector)
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V0, 2.0, INS_OPTS_2S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V24, 1.0, INS_OPTS_2S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V25, 1.0, INS_OPTS_4S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V26, 1.0, INS_OPTS_2D);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V27, -10.0, INS_OPTS_2S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V28, -10.0, INS_OPTS_4S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V29, -10.0, INS_OPTS_2D);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V30, 31.0, INS_OPTS_2S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V31, 31.0, INS_OPTS_4S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V0, 31.0, INS_OPTS_2D);
+ theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V1, -0.125, INS_OPTS_2S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V2, -0.125, INS_OPTS_4S);
+ theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V3, -0.125, INS_OPTS_2D);
+
+ // fcmp with 0.0
+ theEmitter->emitIns_R_F(INS_fcmp, EA_8BYTE, REG_V12, 0.0);
+ theEmitter->emitIns_R_F(INS_fcmp, EA_4BYTE, REG_V13, 0.0);
+ theEmitter->emitIns_R_F(INS_fcmpe, EA_8BYTE, REG_V14, 0.0);
+ theEmitter->emitIns_R_F(INS_fcmpe, EA_4BYTE, REG_V15, 0.0);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R fmov/fcmp/fcvt
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // fmov to vector to vector
+ theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V0, REG_V2);
+ theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V1, REG_V3);
+
+ // fmov to vector to general
+ theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R0, REG_V4);
+ theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R1, REG_V5);
+ // using the optional conversion specifier
+ theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_D_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R3, REG_V7, INS_OPTS_S_TO_4BYTE);
+
+ // fmov to general to vector
+ theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V8, REG_R4);
+ theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V9, REG_R5);
+ // using the optional conversion specifier
+ theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V10, REG_R6, INS_OPTS_8BYTE_TO_D);
+ theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V11, REG_R7, INS_OPTS_4BYTE_TO_S);
+
+ // fcmp/fcmpe
+ theEmitter->emitIns_R_R(INS_fcmp, EA_8BYTE, REG_V8, REG_V16);
+ theEmitter->emitIns_R_R(INS_fcmp, EA_4BYTE, REG_V9, REG_V17);
+ theEmitter->emitIns_R_R(INS_fcmpe, EA_8BYTE, REG_V10, REG_V18);
+ theEmitter->emitIns_R_R(INS_fcmpe, EA_4BYTE, REG_V11, REG_V19);
+
+ // fcvt
+ theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V24, REG_V25, INS_OPTS_S_TO_D); // Single to Double
+ theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V26, REG_V27, INS_OPTS_D_TO_S); // Double to Single
+
+ theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V1, REG_V2, INS_OPTS_H_TO_S);
+ theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V3, REG_V4, INS_OPTS_H_TO_D);
+
+ theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V5, REG_V6, INS_OPTS_S_TO_H);
+ theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V7, REG_V8, INS_OPTS_D_TO_H);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R floating point conversions
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // fcvtas scalar
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtas scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtas vector
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ // fcvtau scalar
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtau scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtau vector
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // fcvtms scalar
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtms scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtms vector
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ // fcvtmu scalar
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtmu scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtmu vector
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // fcvtns scalar
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtns scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtns vector
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ // fcvtnu scalar
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtnu scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtnu vector
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // fcvtps scalar
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtps scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtps vector
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ // fcvtpu scalar
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtpu scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtpu vector
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // fcvtzs scalar
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtzs scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtzs vector
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ // fcvtzu scalar
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V2, REG_V3);
+
+ // fcvtzu scalar to general
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+ // fcvtzu vector
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ ////////////////////////////////////////////////////////////////////////////////
+
+ // scvtf scalar
+ theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V2, REG_V3);
+
+ // scvtf scalar from general
+ theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S);
+ theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S);
+ theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D);
+ theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D);
+
+ // scvtf vector
+ theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+ // ucvtf scalar
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V2, REG_V3);
+
+ // ucvtf scalar from general
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S);
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S);
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D);
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D);
+
+ // ucvtf vector
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R floating point operations, one dest, one source
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // fabs scalar
+ theEmitter->emitIns_R_R(INS_fabs, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V2, REG_V3);
+
+ // fabs vector
+ theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // fneg scalar
+ theEmitter->emitIns_R_R(INS_fneg, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V2, REG_V3);
+
+ // fneg vector
+ theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // fsqrt scalar
+ theEmitter->emitIns_R_R(INS_fsqrt, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V2, REG_V3);
+
+ // fsqrt vector
+ theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // abs scalar
+ theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V2, REG_V3);
+
+ // abs vector
+ theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D);
+
+ // neg scalar
+ theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V2, REG_V3);
+
+ // neg vector
+ theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D);
+
+ // mvn vector
+ theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V4, REG_V5);
+ theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V6, REG_V7, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V8, REG_V9);
+ theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_16B);
+
+ // cnt vector
+ theEmitter->emitIns_R_R(INS_cnt, EA_8BYTE, REG_V22, REG_V23, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_cnt, EA_16BYTE, REG_V24, REG_V25, INS_OPTS_16B);
+
+ // not vector (the same encoding as mvn)
+ theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V12, REG_V13);
+ theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V14, REG_V15, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V16, REG_V17);
+ theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V18, REG_V19, INS_OPTS_16B);
+
+ // cls vector
+ theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+
+ // clz vector
+ theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+
+ // rbit vector
+ theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_rbit, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
+
+ // rev16 vector
+ theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_rev16, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
+
+ // rev32 vector
+ theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+
+ // rev64 vector
+ theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+
+#endif
+
+ //
+ // R_R floating point round to int, one dest, one source
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ // frinta scalar
+ theEmitter->emitIns_R_R(INS_frinta, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V2, REG_V3);
+
+ // frinta vector
+ theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // frinti scalar
+ theEmitter->emitIns_R_R(INS_frinti, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V2, REG_V3);
+
+ // frinti vector
+ theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // frintm scalar
+ theEmitter->emitIns_R_R(INS_frintm, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V2, REG_V3);
+
+ // frintm vector
+ theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // frintn scalar
+ theEmitter->emitIns_R_R(INS_frintn, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V2, REG_V3);
+
+ // frintn vector
+ theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // frintp scalar
+ theEmitter->emitIns_R_R(INS_frintp, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V2, REG_V3);
+
+ // frintp vector
+ theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // frintx scalar
+ theEmitter->emitIns_R_R(INS_frintx, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V2, REG_V3);
+
+ // frintx vector
+ theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+ // frintz scalar
+ theEmitter->emitIns_R_R(INS_frintz, EA_4BYTE, REG_V0, REG_V1);
+ theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V2, REG_V3);
+
+ // frintz vector
+ theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+ theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R_R floating point operations, one dest, two source
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_R(INS_fadd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_fsub, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_fdiv, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_fmax, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_fmin, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ // fabd
+ theEmitter->emitIns_R_R_R(INS_fabd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_R(INS_fmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R_I(INS_fmul, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_fmulx, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_fnmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+ theEmitter->emitIns_R_R_R(INS_fnmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R_I vector operations, one dest, one source reg, one immed
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // 'sshr' scalar
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'sshr' vector
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'ssra' scalar
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'ssra' vector
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'srshr' scalar
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'srshr' vector
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'srsra' scalar
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'srsra' vector
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'shl' scalar
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'shl' vector
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'ushr' scalar
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'ushr' vector
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'usra' scalar
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'usra' vector
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'urshr' scalar
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'urshr' vector
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'ursra' scalar
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'srsra' vector
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'sri' scalar
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'sri' vector
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'sli' scalar
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V2, REG_V3, 14);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 27);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V6, REG_V7, 40);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 63);
+
+ // 'sli' vector
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+ theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+ // 'sshll' vector
+ theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+
+ // 'ushll' vector
+ theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+
+ // 'shrn' vector
+ theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+
+ // 'rshrn' vector
+ theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+
+ // 'sxtl' vector
+ theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+
+ // 'uxtl' vector
+ theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+ theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
+ theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H);
+ theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+ theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R_R vector operations, one dest, two source
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // Specifying an Arrangement is optional
+ //
+ theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8);
+ theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11);
+ theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14);
+ theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17);
+ theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20);
+ theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23);
+ theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26);
+ theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29);
+ theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0);
+ theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3);
+
+ theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6);
+ theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9);
+ theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12);
+ theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15);
+ theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18);
+ theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21);
+
+ // Default Arrangement as per the ARM64 manual
+ //
+ theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3, INS_OPTS_16B);
+
+ theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_16B);
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V0, REG_V1, REG_V2); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V1, REG_V2, REG_V3); // scalar 8BYTE
+ theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V22, REG_V23, REG_V24, INS_OPTS_2D);
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ // saba vector
+ theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // sabd vector
+ theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // uaba vector
+ theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ // uabd vector
+ theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R_R vector multiply
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+ theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+ theEmitter->emitIns_R_R_R(INS_pmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B);
+ theEmitter->emitIns_R_R_R(INS_pmul, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B);
+
+ // 'mul' vector by elem
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
+
+ // 'mla' vector by elem
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
+
+ // 'mls' vector by elem
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
+ theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R_R floating point operations, one source/dest, and two source
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ genDefineTempLabel(genCreateTempLabel());
+
+ theEmitter->emitIns_R_R_R(INS_fmla, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R_I(INS_fmla, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R(INS_fmls, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+ theEmitter->emitIns_R_R_R_I(INS_fmls, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
+ theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
+ theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
+ theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ //
+ // R_R_R_R floating point operations, one dest, and three source
+ //
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_4BYTE, REG_V0, REG_V8, REG_V16, REG_V24);
+ theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_4BYTE, REG_V1, REG_V9, REG_V17, REG_V25);
+ theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_4BYTE, REG_V2, REG_V10, REG_V18, REG_V26);
+ theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_4BYTE, REG_V3, REG_V11, REG_V19, REG_V27);
+
+ theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_8BYTE, REG_V4, REG_V12, REG_V20, REG_V28);
+ theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_8BYTE, REG_V5, REG_V13, REG_V21, REG_V29);
+ theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_8BYTE, REG_V6, REG_V14, REG_V22, REG_V30);
+ theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_8BYTE, REG_V7, REG_V15, REG_V23, REG_V31);
+
+#endif
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+ BasicBlock* label = genCreateTempLabel();
+ genDefineTempLabel(label);
+ instGen(INS_nop);
+ instGen(INS_nop);
+ instGen(INS_nop);
+ instGen(INS_nop);
+ theEmitter->emitIns_R_L(INS_adr, EA_4BYTE_DSP_RELOC, label, REG_R0);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+ printf("*************** End of genArm64EmitterUnitTests()\n");
+}
+#endif // defined(DEBUG)
+
+#endif // _TARGET_ARM64_
+
+#endif // !LEGACY_BACKEND